Implementation of basic data flow analysis for PowerPC (#212)

* WIP implementation

* * Move flow analysis to dedicated file
* Show string constants inline
* Handle calls to MWCC "sled" helpers which otherwise disrupt flow analysis

* Run cargo insta review

* Apply clippy feedback

* Update more tests.

* Remove std use from ppc flow analysis

* Try to make wasm build work again

* More test changes

* Probably last wasm fix

* Formatting

* Fix WASM

* One more clippy thing

* Fixed display of float constants in a LFS or LFD instruction in case where there is a branch to the subsequent instruction with a different register value.

* On lines with a reloc, only hide Symbol type data flow values rather than all data flow values.

* Formatting
This commit is contained in:
Mark Langen 2025-06-17 11:59:04 -07:00 committed by GitHub
parent f58616b6dd
commit e638d0b17a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
25 changed files with 870 additions and 66 deletions

View File

@ -570,6 +570,7 @@ impl FunctionDiffUi {
DiffTextColor::Normal => Color::Gray,
DiffTextColor::Dim => Color::DarkGray,
DiffTextColor::Bright => Color::White,
DiffTextColor::DataFlow => Color::LightCyan,
DiffTextColor::Replace => Color::Cyan,
DiffTextColor::Delete => Color::Red,
DiffTextColor::Insert => Color::Green,

View File

@ -175,7 +175,7 @@ time = { version = "0.3", optional = true }
encoding_rs = { version = "0.8.35", optional = true }
[target.'cfg(windows)'.dependencies]
winapi = { version = "0.3", optional = true }
winapi = { version = "0.3", optional = true, features = ["winbase"] }
# For Linux static binaries, use rustls
[target.'cfg(target_os = "linux")'.dependencies]

View File

@ -25,6 +25,20 @@
}
]
},
{
"id": "analyzeDataFlow",
"type": "boolean",
"default": false,
"name": "(Experimental) Perform data flow analysis",
"description": "Use data flow analysis to display known information about register contents where possible"
},
{
"id": "showDataFlow",
"type": "boolean",
"default": true,
"name": "Show data flow",
"description": "Show data flow analysis results in place of register name where present"
},
{
"id": "spaceBetweenArgs",
"type": "boolean",
@ -264,7 +278,8 @@
"id": "ppc",
"name": "PowerPC",
"properties": [
"ppc.calculatePoolRelocations"
"ppc.calculatePoolRelocations",
"analyzeDataFlow"
]
},
{

View File

@ -1,5 +1,8 @@
use alloc::{borrow::Cow, boxed::Box, format, string::String, vec::Vec};
use core::{ffi::CStr, fmt, fmt::Debug};
use core::{
ffi::CStr,
fmt::{self, Debug},
};
use anyhow::{Result, bail};
use encoding_rs::SHIFT_JIS;
@ -11,8 +14,9 @@ use crate::{
display::{ContextItem, HoverItem, InstructionPart},
},
obj::{
InstructionArg, InstructionRef, Object, ParsedInstruction, Relocation, RelocationFlags,
ResolvedInstructionRef, ResolvedSymbol, Section, Symbol, SymbolFlagSet, SymbolKind,
FlowAnalysisResult, InstructionArg, InstructionRef, Object, ParsedInstruction, Relocation,
RelocationFlags, ResolvedInstructionRef, ResolvedSymbol, Section, Symbol, SymbolFlagSet,
SymbolKind,
},
util::ReallySigned,
};
@ -31,6 +35,7 @@ pub mod superh;
pub mod x86;
/// Represents the type of data associated with an instruction
#[derive(PartialEq)]
pub enum DataType {
Int8,
Int16,
@ -335,6 +340,17 @@ pub trait Arch: Send + Sync + Debug {
Vec::new()
}
// Perform detailed data flow analysis
fn data_flow_analysis(
&self,
_obj: &Object,
_symbol: &Symbol,
_code: &[u8],
_relocations: &[Relocation],
) -> Option<Box<dyn FlowAnalysisResult>> {
None
}
fn implcit_addend(
&self,
file: &object::File<'_>,

View File

@ -0,0 +1,642 @@
use alloc::{
boxed::Box,
collections::{BTreeMap, BTreeSet},
format,
string::{String, ToString},
vec::Vec,
};
use core::{
ffi::CStr,
ops::{Index, IndexMut},
};
use itertools::Itertools;
use ppc750cl::Simm;
use crate::{
arch::DataType,
obj::{FlowAnalysisResult, FlowAnalysisValue, Object, Relocation, Symbol},
util::{RawDouble, RawFloat},
};
fn is_store_instruction(op: ppc750cl::Opcode) -> bool {
use ppc750cl::Opcode;
matches!(
op,
Opcode::Stbux
| Opcode::Stbx
| Opcode::Stfdux
| Opcode::Stfdx
| Opcode::Stfiwx
| Opcode::Stfsux
| Opcode::Stfsx
| Opcode::Sthbrx
| Opcode::Sthux
| Opcode::Sthx
| Opcode::Stswi
| Opcode::Stswx
| Opcode::Stwbrx
| Opcode::Stwcx_
| Opcode::Stwux
| Opcode::Stwx
| Opcode::Stwu
| Opcode::Stb
| Opcode::Stbu
| Opcode::Sth
| Opcode::Sthu
| Opcode::Stmw
| Opcode::Stfs
| Opcode::Stfsu
| Opcode::Stfd
| Opcode::Stfdu
)
}
pub fn guess_data_type_from_load_store_inst_op(inst_op: ppc750cl::Opcode) -> Option<DataType> {
use ppc750cl::Opcode;
match inst_op {
Opcode::Lbz | Opcode::Lbzu | Opcode::Lbzux | Opcode::Lbzx => Some(DataType::Int8),
Opcode::Lhz | Opcode::Lhzu | Opcode::Lhzux | Opcode::Lhzx => Some(DataType::Int16),
Opcode::Lha | Opcode::Lhau | Opcode::Lhaux | Opcode::Lhax => Some(DataType::Int16),
Opcode::Lwz | Opcode::Lwzu | Opcode::Lwzux | Opcode::Lwzx => Some(DataType::Int32),
Opcode::Lfs | Opcode::Lfsu | Opcode::Lfsux | Opcode::Lfsx => Some(DataType::Float),
Opcode::Lfd | Opcode::Lfdu | Opcode::Lfdux | Opcode::Lfdx => Some(DataType::Double),
Opcode::Stb | Opcode::Stbu | Opcode::Stbux | Opcode::Stbx => Some(DataType::Int8),
Opcode::Sth | Opcode::Sthu | Opcode::Sthux | Opcode::Sthx => Some(DataType::Int16),
Opcode::Stw | Opcode::Stwu | Opcode::Stwux | Opcode::Stwx => Some(DataType::Int32),
Opcode::Stfs | Opcode::Stfsu | Opcode::Stfsux | Opcode::Stfsx => Some(DataType::Float),
Opcode::Stfd | Opcode::Stfdu | Opcode::Stfdux | Opcode::Stfdx => Some(DataType::Double),
_ => None,
}
}
#[derive(Default, PartialEq, Eq, Copy, Clone, Debug, PartialOrd, Ord)]
enum RegisterContent {
#[default]
Unknown,
Variable, // Multiple potential values
FloatConstant(RawFloat),
DoubleConstant(RawDouble),
IntConstant(i32),
InputRegister(u8),
Symbol(usize),
}
impl core::fmt::Display for RegisterContent {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
match self {
RegisterContent::Unknown => write!(f, "unknown"),
RegisterContent::Variable => write!(f, "variable"),
RegisterContent::IntConstant(i) =>
// -i is safe because it's at most a 16 bit constant in the i32
{
if *i >= 0 {
write!(f, "0x{:x}", i)
} else {
write!(f, "-0x{:x}", -i)
}
}
RegisterContent::FloatConstant(RawFloat(fp)) => write!(f, "{fp:?}f"),
RegisterContent::DoubleConstant(RawDouble(fp)) => write!(f, "{fp:?}d"),
RegisterContent::InputRegister(p) => write!(f, "input{p}"),
RegisterContent::Symbol(_u) => write!(f, "relocation"),
}
}
}
#[derive(Clone, PartialEq, Eq, Ord, PartialOrd)]
struct RegisterState {
gpr: [RegisterContent; 32],
fpr: [RegisterContent; 32],
}
impl RegisterState {
fn new() -> Self {
RegisterState { gpr: [RegisterContent::Unknown; 32], fpr: [RegisterContent::Unknown; 32] }
}
// During a function call, these registers must be assumed trashed.
fn clear_volatile(&mut self) {
self[ppc750cl::GPR(0)] = RegisterContent::Unknown;
for i in 0..=13 {
self[ppc750cl::GPR(i)] = RegisterContent::Unknown;
}
for i in 0..=13 {
self[ppc750cl::FPR(i)] = RegisterContent::Unknown;
}
}
// Mark potential input values.
// Subsequent flow analysis will "realize" that they are not actually inputs if
// they get overwritten with another value before getting read.
fn set_potential_inputs(&mut self) {
for g_reg in 3..=13 {
self[ppc750cl::GPR(g_reg)] = RegisterContent::InputRegister(g_reg);
}
for f_reg in 1..=13 {
self[ppc750cl::FPR(f_reg)] = RegisterContent::InputRegister(f_reg);
}
}
// If the there is no value, we can take the new known value.
// If there's a known value different than the new value, the content
// must is variable.
// Returns whether the current value was updated.
fn unify_values(current: &mut RegisterContent, new: &RegisterContent) -> bool {
if *current == *new {
false
} else if *current == RegisterContent::Unknown {
*current = *new;
true
} else if *current == RegisterContent::Variable {
// Already variable
false
} else {
*current = RegisterContent::Variable;
true
}
}
// Unify currently known register contents in a give situation with new
// information about the register contents in that situation.
// Currently unknown register contents can be filled, but if there are
// conflicting contents, we go back to unknown.
fn unify(&mut self, other: &RegisterState) -> bool {
let mut updated = false;
for i in 0..32 {
updated |= Self::unify_values(&mut self.gpr[i], &other.gpr[i]);
updated |= Self::unify_values(&mut self.fpr[i], &other.fpr[i]);
}
updated
}
}
impl Index<ppc750cl::GPR> for RegisterState {
type Output = RegisterContent;
fn index(&self, gpr: ppc750cl::GPR) -> &Self::Output { &self.gpr[gpr.0 as usize] }
}
impl IndexMut<ppc750cl::GPR> for RegisterState {
fn index_mut(&mut self, gpr: ppc750cl::GPR) -> &mut Self::Output {
&mut self.gpr[gpr.0 as usize]
}
}
impl Index<ppc750cl::FPR> for RegisterState {
type Output = RegisterContent;
fn index(&self, fpr: ppc750cl::FPR) -> &Self::Output { &self.fpr[fpr.0 as usize] }
}
impl IndexMut<ppc750cl::FPR> for RegisterState {
fn index_mut(&mut self, fpr: ppc750cl::FPR) -> &mut Self::Output {
&mut self.fpr[fpr.0 as usize]
}
}
fn execute_instruction(
registers: &mut RegisterState,
op: &ppc750cl::Opcode,
args: &ppc750cl::Arguments,
) {
use ppc750cl::{Argument, GPR, Opcode};
match (op, args[0], args[1], args[2]) {
(Opcode::Or, Argument::GPR(a), Argument::GPR(b), Argument::GPR(c)) => {
// Move is implemented as or with self for ints
if b == c {
registers[a] = registers[b];
} else {
registers[a] = RegisterContent::Unknown;
}
}
(Opcode::Fmr, Argument::FPR(a), Argument::FPR(b), _) => {
registers[a] = registers[b];
}
(Opcode::Addi, Argument::GPR(a), Argument::GPR(GPR(0)), Argument::Simm(c)) => {
// Load immidiate implemented as addi with addend = r0
// Let Addi with other addends fall through to the case which
// overwrites the destination
registers[a] = RegisterContent::IntConstant(c.0 as i32);
}
(Opcode::Bcctr, _, _, _) => {
// Called a function pointer, may have erased volatile registers
registers.clear_volatile();
}
(Opcode::B, _, _, _) => {
if get_branch_offset(args) == 0 {
// Call to another function
registers.clear_volatile();
}
}
(
Opcode::Stbu | Opcode::Sthu | Opcode::Stwu | Opcode::Stfsu | Opcode::Stfdu,
_,
_,
Argument::GPR(rel),
) => {
// Storing with update, clear updated register (third arg)
registers[rel] = RegisterContent::Unknown;
}
(
Opcode::Stbux | Opcode::Sthux | Opcode::Stwux | Opcode::Stfsux | Opcode::Stfdux,
_,
Argument::GPR(rel),
_,
) => {
// Storing indexed with update, clear updated register (second arg)
registers[rel] = RegisterContent::Unknown;
}
(Opcode::Lmw, Argument::GPR(target), _, _) => {
// `lmw` overwrites all registers from rd to r31.
for reg in target.0..31 {
registers[GPR(reg)] = RegisterContent::Unknown;
}
}
(_, Argument::GPR(a), _, _) => {
// Store instructions don't modify the GPR
if !is_store_instruction(*op) {
// Other operations which write to GPR a
registers[a] = RegisterContent::Unknown;
}
}
(_, Argument::FPR(a), _, _) => {
// Store instructions don't modify the FPR
if !is_store_instruction(*op) {
// Other operations which write to FPR a
registers[a] = RegisterContent::Unknown;
}
}
(_, _, _, _) => {}
}
}
fn get_branch_offset(args: &ppc750cl::Arguments) -> i32 {
for arg in args.iter() {
match arg {
ppc750cl::Argument::BranchDest(dest) => return dest.0 / 4,
ppc750cl::Argument::None => break,
_ => {}
}
}
0
}
#[derive(Debug, Default)]
struct PPCFlowAnalysisResult {
argument_contents: BTreeMap<(u64, u8), FlowAnalysisValue>,
}
impl PPCFlowAnalysisResult {
fn set_argument_value_at_address(
&mut self,
address: u64,
argument: u8,
value: FlowAnalysisValue,
) {
self.argument_contents.insert((address, argument), value);
}
fn new() -> Self { PPCFlowAnalysisResult { argument_contents: Default::default() } }
}
impl FlowAnalysisResult for PPCFlowAnalysisResult {
fn get_argument_value_at_address(
&self,
address: u64,
argument: u8,
) -> Option<&FlowAnalysisValue> {
self.argument_contents.get(&(address, argument))
}
}
fn clamp_text_length(s: String, max: usize) -> String {
if s.len() <= max { s } else { format!("{}", s.chars().take(max - 3).collect::<String>()) }
}
fn get_register_content_from_reloc(
reloc: &Relocation,
obj: &Object,
op: ppc750cl::Opcode,
) -> RegisterContent {
if let Some(bytes) = obj.symbol_data(reloc.target_symbol) {
match guess_data_type_from_load_store_inst_op(op) {
Some(DataType::Float) => {
RegisterContent::FloatConstant(RawFloat(match obj.endianness {
object::Endianness::Little => {
f32::from_le_bytes(bytes.try_into().unwrap_or([0; 4]))
}
object::Endianness::Big => {
f32::from_be_bytes(bytes.try_into().unwrap_or([0; 4]))
}
}))
}
Some(DataType::Double) => {
RegisterContent::DoubleConstant(RawDouble(match obj.endianness {
object::Endianness::Little => {
f64::from_le_bytes(bytes.try_into().unwrap_or([0; 8]))
}
object::Endianness::Big => {
f64::from_be_bytes(bytes.try_into().unwrap_or([0; 8]))
}
}))
}
_ => RegisterContent::Symbol(reloc.target_symbol),
}
} else {
RegisterContent::Symbol(reloc.target_symbol)
}
}
// Executing op with args at cur_address, update current_state with symbols that
// come from relocations. That is, references to globals, floating point
// constants, string constants, etc.
fn fill_registers_from_relocation(
reloc: &Relocation,
current_state: &mut RegisterState,
obj: &Object,
op: ppc750cl::Opcode,
args: &ppc750cl::Arguments,
) {
// Only update the register state for loads. We may store to a reloc
// address but that doesn't update register contents.
if !is_store_instruction(op) {
match (op, args[0]) {
// Everything else is a load of some sort
(_, ppc750cl::Argument::GPR(gpr)) => {
current_state[gpr] = get_register_content_from_reloc(reloc, obj, op);
}
(_, ppc750cl::Argument::FPR(fpr)) => {
current_state[fpr] = get_register_content_from_reloc(reloc, obj, op);
}
_ => {}
}
}
}
// Special helper fragments generated by MWCC.
// See: https://github.com/encounter/decomp-toolkit/blob/main/src/analysis/pass.rs
const SLEDS: [&str; 6] = ["_savefpr_", "_restfpr_", "_savegpr_", "_restgpr_", "_savev", "_restv"];
fn is_sled_function(name: &str) -> bool { SLEDS.iter().any(|sled| name.starts_with(sled)) }
pub fn ppc_data_flow_analysis(
obj: &Object,
func_symbol: &Symbol,
code: &[u8],
relocations: &[Relocation],
) -> Box<dyn FlowAnalysisResult> {
use alloc::collections::VecDeque;
use ppc750cl::InsIter;
let instructions = InsIter::new(code, func_symbol.address as u32)
.map(|(_addr, ins)| (ins.op, ins.basic().args))
.collect_vec();
let func_address = func_symbol.address;
// Get initial register values from function parameters
let mut initial_register_state = RegisterState::new();
initial_register_state.set_potential_inputs();
let mut execution_queue = VecDeque::<(usize, RegisterState)>::new();
execution_queue.push_back((0, initial_register_state));
// Execute the instructions against abstract data
let mut failsafe_counter = 0;
let mut taken_branches = BTreeSet::<(usize, RegisterState)>::new();
let mut register_state_at = Vec::<RegisterState>::new();
let mut completed_first_pass = false;
register_state_at.resize_with(instructions.len(), RegisterState::new);
while let Some((mut index, mut current_state)) = execution_queue.pop_front() {
while let Some((op, args)) = instructions.get(index) {
// Record the state at this index
// If recording does not result in any changes to the known values
// we're done, because the subsequent values are a function of the
// current values so we'll get the same result as the last time
// we went down this path.
// Don't break out if we haven't even completed the first pass
// through the function though.
if !register_state_at[index].unify(&current_state) && completed_first_pass {
break;
}
// Get symbol used in this instruction
let cur_addr = (func_address as u32) + ((index * 4) as u32);
let reloc = relocations.iter().find(|r| (r.address as u32 & !3) == cur_addr);
// Is this a branch to a compiler generated helper? These helpers
// do not trash registers like normal function calls, so we don't
// want to treat this as normal execution.
let symbol = reloc.and_then(|r| obj.symbols.get(r.target_symbol));
let is_sled_invocation = symbol.is_some_and(|x| is_sled_function(&x.name));
// Execute the instruction to update the state
// Since sled invocations are only used to save / restore registers
// as part of prelude / cleanup in a function call we don't have to
// do any execution for them.
if !is_sled_invocation {
execute_instruction(&mut current_state, op, args);
}
// Fill in register state coming from relocations at this line. This
// handles references to global variables, floating point constants,
// etc.
if let Some(reloc) = reloc {
fill_registers_from_relocation(reloc, &mut current_state, obj, *op, args);
}
// Add conditional branches to execution queue
// Only take a given (address, register state) combination once. If
// the known register state is different we have to take the branch
// again to stabilize the known values for backwards branches.
if op == &ppc750cl::Opcode::Bc {
let branch_state = (index, current_state.clone());
if !taken_branches.contains(&branch_state) {
let offset = get_branch_offset(args);
let target_index = ((index as i32) + offset) as usize;
execution_queue.push_back((target_index, current_state.clone()));
taken_branches.insert(branch_state);
// We should never hit this case, but avoid getting stuck in
// an infinite loop if we hit some kind of bad behavior.
failsafe_counter += 1;
if failsafe_counter > 256 {
//println!("Analysis of {} failed to stabilize", func_symbol.name);
return Box::new(PPCFlowAnalysisResult::new());
}
}
}
// Update index
if op == &ppc750cl::Opcode::B {
// Unconditional branch
let offset = get_branch_offset(args);
if offset > 0 {
// Jump table or branch to over else clause.
index += offset as usize;
} else if offset == 0 {
// Function call with relocation. We'll return to
// the next instruction.
index += 1;
} else {
// Unconditional branch (E.g.: loop { ... })
// Also some compilations of loops put the conditional at
// the end and B to it for the check of the first iteration.
let branch_state = (index, current_state.clone());
if taken_branches.contains(&branch_state) {
break;
}
taken_branches.insert(branch_state);
index = ((index as i32) + offset) as usize;
}
} else {
// Normal execution of next instruction
index += 1;
}
}
// Mark that we've completed at least one pass over the function, at
// this point we can break out if the code we're running doesn't change
// any register outcomes.
completed_first_pass = true;
}
// Store the relevant data flow values for simplified instructions
generate_flow_analysis_result(obj, func_address, code, register_state_at, relocations)
}
fn get_string_data(obj: &Object, symbol_index: usize, offset: Simm) -> Option<&str> {
if let Some(sym) = obj.symbols.get(symbol_index) {
if sym.name.starts_with("@stringBase") && offset.0 != 0 {
if let Some(data) = obj.symbol_data(symbol_index) {
let bytes = &data[offset.0 as usize..];
if let Ok(Ok(str)) = CStr::from_bytes_until_nul(bytes).map(|x| x.to_str()) {
return Some(str);
}
}
}
}
None
}
// Write the relevant part of the flow analysis out into the FlowAnalysisResult
// the rest of the application will use to query results of the flow analysis.
// Flow analysis will compute the known contents of every register at every
// line, but we only need to record the values of registers that are actually
// referenced at each line.
fn generate_flow_analysis_result(
obj: &Object,
base_address: u64,
code: &[u8],
register_state_at: Vec<RegisterState>,
relocations: &[Relocation],
) -> Box<PPCFlowAnalysisResult> {
use ppc750cl::{Argument, InsIter};
let mut analysis_result = PPCFlowAnalysisResult::new();
let default_register_state = RegisterState::new();
for (addr, ins) in InsIter::new(code, 0) {
let ins_address = base_address + (addr as u64);
let index = addr / 4;
let ppc750cl::ParsedIns { mnemonic: _, args } = ins.simplified();
// If we're already showing relocations on a line don't also show data flow
let reloc = relocations.iter().find(|r| (r.address & !3) == ins_address);
// Special case to show float and double constants on the line where
// they are being loaded.
// We need to do this before we break out on showing relocations in the
// subsequent if statement.
if let (ppc750cl::Opcode::Lfs | ppc750cl::Opcode::Lfd, Some(reloc)) = (ins.op, reloc) {
let content = get_register_content_from_reloc(reloc, obj, ins.op);
if matches!(
content,
RegisterContent::FloatConstant(_) | RegisterContent::DoubleConstant(_)
) {
analysis_result.set_argument_value_at_address(
ins_address,
1,
FlowAnalysisValue::Text(content.to_string()),
);
// Don't need to show any other data flow if we're showing that
continue;
}
}
// Special case to show string constants on the line where they are
// being indexed to. This will typically be "addi t, stringbase, offset"
let registers = register_state_at.get(index as usize).unwrap_or(&default_register_state);
if let (ppc750cl::Opcode::Addi, Argument::GPR(rel), Argument::Simm(offset)) =
(ins.op, args[1], args[2])
{
if let RegisterContent::Symbol(sym_index) = registers[rel] {
if let Some(str) = get_string_data(obj, sym_index, offset) {
// Show the string constant in the analysis result
let formatted = format!("\"{str}\"");
analysis_result.set_argument_value_at_address(
ins_address,
2,
FlowAnalysisValue::Text(clamp_text_length(formatted, 20)),
);
// Don't continue, we want to show the stringbase value as well
}
}
}
let is_store = is_store_instruction(ins.op);
for (arg_index, arg) in args.into_iter().enumerate() {
// Hacky shorthand for determining which arguments are sources,
// We only want to show data flow for source registers, not target
// registers. Technically there are some non-"st_" operations which
// read from their first argument but they're rare.
if (arg_index == 0) && !is_store {
continue;
}
let content = match arg {
Argument::GPR(gpr) => Some(registers[gpr]),
Argument::FPR(fpr) => Some(registers[fpr]),
_ => None,
};
let analysis_value = match content {
Some(RegisterContent::Symbol(s)) => {
if reloc.is_none() {
// Only symbols if there isn't already a relocation, because
// code other than the data flow analysis will be showing
// the symbol for a relocation on the line it is for. If we
// also showed it as data flow analysis value we would be
// showing redundant information.
obj.symbols.get(s).map(|sym| {
FlowAnalysisValue::Text(clamp_text_length(
sym.demangled_name.as_ref().unwrap_or(&sym.name).clone(),
20,
))
})
} else {
None
}
}
Some(RegisterContent::InputRegister(reg)) => {
let reg_name = match arg {
Argument::GPR(_) => format!("in_r{reg}"),
Argument::FPR(_) => format!("in_f{reg}"),
_ => panic!("Register content should only be in a register"),
};
Some(FlowAnalysisValue::Text(reg_name))
}
Some(RegisterContent::Unknown) | Some(RegisterContent::Variable) => None,
Some(value) => Some(FlowAnalysisValue::Text(format!("{value}"))),
None => None,
};
if let Some(analysis_value) = analysis_value {
analysis_result.set_argument_value_at_address(
ins_address,
arg_index as u8,
analysis_value,
);
}
}
}
Box::new(analysis_result)
}

View File

@ -1,4 +1,5 @@
use alloc::{
boxed::Box,
collections::{BTreeMap, BTreeSet},
string::{String, ToString},
vec,
@ -18,11 +19,13 @@ use crate::{
display::{ContextItem, HoverItem, HoverItemColor, InstructionPart, SymbolNavigationKind},
},
obj::{
InstructionRef, Object, Relocation, RelocationFlags, ResolvedInstructionRef,
ResolvedRelocation, Symbol, SymbolFlag, SymbolFlagSet,
FlowAnalysisResult, InstructionRef, Object, Relocation, RelocationFlags,
ResolvedInstructionRef, ResolvedRelocation, Symbol, SymbolFlag, SymbolFlagSet,
},
};
mod flow_analysis;
// Relative relocation, can be Simm, Offset or BranchDest
fn is_relative_arg(arg: &ppc750cl::Argument) -> bool {
matches!(
@ -157,6 +160,7 @@ impl Arch for ArchPpc {
Ok(())
}
// Could be replaced by data_flow_analysis once that feature stabilizes
fn generate_pooled_relocations(
&self,
address: u64,
@ -167,6 +171,16 @@ impl Arch for ArchPpc {
generate_fake_pool_relocations_for_function(address, code, relocations, symbols)
}
fn data_flow_analysis(
&self,
obj: &Object,
symbol: &Symbol,
code: &[u8],
relocations: &[Relocation],
) -> Option<Box<dyn FlowAnalysisResult>> {
Some(flow_analysis::ppc_data_flow_analysis(obj, symbol, code, relocations))
}
fn implcit_addend(
&self,
_file: &object::File<'_>,
@ -225,7 +239,7 @@ impl Arch for ArchPpc {
return Some(DataType::String);
}
let opcode = ppc750cl::Opcode::from(resolved.ins_ref.opcode as u8);
if let Some(ty) = guess_data_type_from_load_store_inst_op(opcode) {
if let Some(ty) = flow_analysis::guess_data_type_from_load_store_inst_op(opcode) {
// Numeric type.
return Some(ty);
}
@ -501,25 +515,6 @@ fn make_symbol_ref(symbol: &object::Symbol) -> Result<ExtabSymbolRef> {
Ok(ExtabSymbolRef { original_index: symbol.index().0 - 1, name, demangled_name })
}
fn guess_data_type_from_load_store_inst_op(inst_op: ppc750cl::Opcode) -> Option<DataType> {
use ppc750cl::Opcode;
match inst_op {
Opcode::Lbz | Opcode::Lbzu | Opcode::Lbzux | Opcode::Lbzx => Some(DataType::Int8),
Opcode::Lhz | Opcode::Lhzu | Opcode::Lhzux | Opcode::Lhzx => Some(DataType::Int16),
Opcode::Lha | Opcode::Lhau | Opcode::Lhaux | Opcode::Lhax => Some(DataType::Int16),
Opcode::Lwz | Opcode::Lwzu | Opcode::Lwzux | Opcode::Lwzx => Some(DataType::Int32),
Opcode::Lfs | Opcode::Lfsu | Opcode::Lfsux | Opcode::Lfsx => Some(DataType::Float),
Opcode::Lfd | Opcode::Lfdu | Opcode::Lfdux | Opcode::Lfdx => Some(DataType::Double),
Opcode::Stb | Opcode::Stbu | Opcode::Stbux | Opcode::Stbx => Some(DataType::Int8),
Opcode::Sth | Opcode::Sthu | Opcode::Sthux | Opcode::Sthx => Some(DataType::Int16),
Opcode::Stw | Opcode::Stwu | Opcode::Stwux | Opcode::Stwx => Some(DataType::Int32),
Opcode::Stfs | Opcode::Stfsu | Opcode::Stfsux | Opcode::Stfsx => Some(DataType::Float),
Opcode::Stfd | Opcode::Stfdu | Opcode::Stfdux | Opcode::Stfdx => Some(DataType::Double),
_ => None,
}
}
#[derive(Debug)]
struct PoolReference {
addr_src_gpr: ppc750cl::GPR,
@ -536,7 +531,7 @@ fn get_pool_reference_for_inst(
) -> Option<PoolReference> {
use ppc750cl::{Argument, Opcode};
let args = &simplified.args;
if guess_data_type_from_load_store_inst_op(opcode).is_some() {
if flow_analysis::guess_data_type_from_load_store_inst_op(opcode).is_some() {
match (args[1], args[2]) {
(Argument::Offset(offset), Argument::GPR(addr_src_gpr)) => {
// e.g. lwz. Immediate offset.
@ -668,7 +663,7 @@ fn make_fake_pool_reloc(
// and returns a Vec of "fake pool relocations" that simulate what a relocation for that instruction
// would look like if data hadn't been pooled.
// This method tries to follow the function's proper control flow. It keeps track of a queue of
// states it hasn't traversed yet, where each state holds an instruction address and a HashMap of
// states it hasn't traversed yet, where each state holds an instruction address and a map of
// which registers hold which pool relocations at that point.
// When a conditional or unconditional branch is encountered, the destination of the branch is added
// to the queue. Conditional branches will traverse both the path where the branch is taken and the

View File

@ -14,8 +14,9 @@ use regex::Regex;
use crate::{
diff::{DiffObjConfig, InstructionDiffKind, InstructionDiffRow, ObjectDiff, SymbolDiff},
obj::{
InstructionArg, InstructionArgValue, Object, ParsedInstruction, ResolvedInstructionRef,
ResolvedRelocation, SectionFlag, SectionKind, Symbol, SymbolFlag, SymbolKind,
FlowAnalysisValue, InstructionArg, InstructionArgValue, Object, ParsedInstruction,
ResolvedInstructionRef, ResolvedRelocation, SectionFlag, SectionKind, Symbol, SymbolFlag,
SymbolKind,
},
};
@ -47,11 +48,12 @@ pub enum DiffText<'a> {
pub enum DiffTextColor {
#[default]
Normal, // Grey
Dim, // Dark grey
Bright, // White
Replace, // Blue
Delete, // Red
Insert, // Green
Dim, // Dark grey
Bright, // White
DataFlow, // Light blue
Replace, // Blue
Delete, // Red
Insert, // Green
Rotating(u8),
}
@ -186,6 +188,11 @@ pub fn display_row(
}
let mut arg_idx = 0;
let mut displayed_relocation = false;
let analysis_result = if diff_config.show_data_flow {
obj.flow_analysis_results.get(&resolved.symbol.address)
} else {
None
};
obj.arch.display_instruction(resolved, diff_config, &mut |part| match part {
InstructionPart::Basic(text) => {
if text.chars().all(|c| c == ' ') {
@ -208,15 +215,30 @@ pub fn display_row(
if arg == InstructionArg::Reloc {
displayed_relocation = true;
}
match (arg, resolved.ins_ref.branch_dest) {
(InstructionArg::Value(value), _) => cb(DiffTextSegment {
text: DiffText::Argument(value),
color: diff_index
let data_flow_value =
analysis_result.and_then(|result|
result.as_ref().get_argument_value_at_address(
ins_ref.address, (arg_idx - 1) as u8));
match (arg, data_flow_value, resolved.ins_ref.branch_dest) {
// If we have a flow analysis result, always use that over anything else.
(InstructionArg::Value(_) | InstructionArg::Reloc, Some(FlowAnalysisValue::Text(text)), _) => {
cb(DiffTextSegment {
text: DiffText::Argument(InstructionArgValue::Opaque(Cow::Borrowed(text))),
color: DiffTextColor::DataFlow,
pad_to: 0,
})
},
(InstructionArg::Value(value), None, _) => {
let color = diff_index
.get()
.map_or(base_color, |i| DiffTextColor::Rotating(i as u8)),
pad_to: 0,
}),
(InstructionArg::Reloc, None) => {
.map_or(base_color, |i| DiffTextColor::Rotating(i as u8));
cb(DiffTextSegment {
text: DiffText::Argument(value),
color,
pad_to: 0,
})
},
(InstructionArg::Reloc, _, None) => {
let resolved = resolved.relocation.unwrap();
let color = diff_index
.get()
@ -235,9 +257,9 @@ pub fn display_row(
}
Ok(())
}
(InstructionArg::BranchDest(dest), _) |
(InstructionArg::BranchDest(dest), _, _) |
// If the relocation was resolved to a branch destination, emit that instead.
(InstructionArg::Reloc, Some(dest)) => {
(InstructionArg::Reloc, _, Some(dest)) => {
if let Some(addr) = dest.checked_sub(resolved.symbol.address) {
cb(DiffTextSegment {
text: DiffText::BranchDest(addr),

View File

@ -233,6 +233,19 @@ pub enum SymbolKind {
Section,
}
#[derive(Debug)]
pub enum FlowAnalysisValue {
Text(String),
}
pub trait FlowAnalysisResult: core::fmt::Debug + Send {
fn get_argument_value_at_address(
&self,
address: u64,
argument: u8,
) -> Option<&FlowAnalysisValue>;
}
#[derive(Debug, Clone, Eq, PartialEq, Hash, Default)]
pub struct Symbol {
pub name: String,
@ -260,6 +273,7 @@ pub struct Object {
pub path: Option<std::path::PathBuf>,
#[cfg(feature = "std")]
pub timestamp: Option<filetime::FileTime>,
pub flow_analysis_results: BTreeMap<u64, Box<dyn FlowAnalysisResult>>,
}
impl Default for Object {
@ -274,6 +288,7 @@ impl Default for Object {
path: None,
#[cfg(feature = "std")]
timestamp: None,
flow_analysis_results: BTreeMap::<u64, Box<dyn FlowAnalysisResult>>::new(),
}
}
}
@ -312,6 +327,8 @@ impl Object {
pub fn symbol_by_name(&self, name: &str) -> Option<usize> {
self.symbols.iter().position(|symbol| symbol.section.is_some() && symbol.name == name)
}
pub fn has_flow_analysis_result(&self) -> bool { !self.flow_analysis_results.is_empty() }
}
#[derive(Debug, Clone, Eq, PartialEq, Hash)]

View File

@ -432,17 +432,18 @@ fn map_relocations(
Ok(())
}
fn calculate_pooled_relocations(
arch: &dyn Arch,
sections: &mut [Section],
symbols: &[Symbol],
) -> Result<()> {
for (section_index, section) in sections.iter_mut().enumerate() {
fn perform_data_flow_analysis(obj: &mut Object, config: &DiffObjConfig) -> Result<()> {
// If neither of these settings are on, no flow analysis to perform
if !config.analyze_data_flow && !config.ppc_calculate_pool_relocations {
return Ok(());
}
let mut generated_relocations = Vec::<(usize, Vec<Relocation>)>::new();
for (section_index, section) in obj.sections.iter().enumerate() {
if section.kind != SectionKind::Code {
continue;
}
let mut fake_pool_relocs = Vec::new();
for symbol in symbols {
for symbol in obj.symbols.iter() {
if symbol.section != Some(section_index) {
continue;
}
@ -457,14 +458,32 @@ fn calculate_pooled_relocations(
symbol.address + symbol.size
)
})?;
fake_pool_relocs.append(&mut arch.generate_pooled_relocations(
symbol.address,
code,
&section.relocations,
symbols,
));
// Optional pooled relocation computation
// Long view: This could be replaced by the full data flow analysis
// once that feature has stabilized.
if config.ppc_calculate_pool_relocations {
let relocations = obj.arch.generate_pooled_relocations(
symbol.address,
code,
&section.relocations,
&obj.symbols,
);
generated_relocations.push((section_index, relocations));
}
// Optional full data flow analysis
if config.analyze_data_flow {
obj.arch.data_flow_analysis(obj, symbol, code, &section.relocations).and_then(
|flow_result| obj.flow_analysis_results.insert(symbol.address, flow_result),
);
}
}
section.relocations.append(&mut fake_pool_relocs);
}
for (section_index, mut relocations) in generated_relocations {
obj.sections[section_index].relocations.append(&mut relocations);
}
for section in obj.sections.iter_mut() {
section.relocations.sort_by_key(|r| r.address);
}
Ok(())
@ -865,15 +884,12 @@ pub fn parse(data: &[u8], config: &DiffObjConfig) -> Result<Object> {
let (mut symbols, symbol_indices) =
map_symbols(arch.as_ref(), &obj_file, &sections, &section_indices, split_meta.as_ref())?;
map_relocations(arch.as_ref(), &obj_file, &mut sections, &section_indices, &symbol_indices)?;
if config.ppc_calculate_pool_relocations {
calculate_pooled_relocations(arch.as_ref(), &mut sections, &symbols)?;
}
parse_line_info(&obj_file, &mut sections, &section_indices, data)?;
if config.combine_data_sections || config.combine_text_sections {
combine_sections(&mut sections, &mut symbols, config)?;
}
arch.post_init(&sections, &symbols);
Ok(Object {
let mut obj = Object {
arch,
endianness: obj_file.endianness(),
symbols,
@ -883,7 +899,14 @@ pub fn parse(data: &[u8], config: &DiffObjConfig) -> Result<Object> {
path: None,
#[cfg(feature = "std")]
timestamp: None,
})
flow_analysis_results: Default::default(),
};
// Need to construct the obj first so that we have a convinient package to
// pass to flow analysis. Then the flow analysis will mutate obj adding
// additional data to it.
perform_data_flow_analysis(&mut obj, config)?;
Ok(obj)
}
#[cfg(feature = "std")]

View File

@ -59,3 +59,33 @@ pub fn align_u64_to(len: u64, align: u64) -> u64 { len + ((align - (len % align)
pub fn align_data_slice_to(data: &mut Vec<u8>, align: u64) {
data.resize(align_u64_to(data.len() as u64, align) as usize, 0);
}
// Float where we specifically care about comparing the raw bits rather than
// caring about IEEE semantics.
#[derive(Copy, Clone, Debug)]
pub struct RawFloat(pub f32);
impl PartialEq for RawFloat {
fn eq(&self, other: &Self) -> bool { self.0.to_bits() == other.0.to_bits() }
}
impl Eq for RawFloat {}
impl Ord for RawFloat {
fn cmp(&self, other: &Self) -> core::cmp::Ordering { self.0.to_bits().cmp(&other.0.to_bits()) }
}
impl PartialOrd for RawFloat {
fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> { Some(self.cmp(other)) }
}
// Double where we specifically care about comparing the raw bits rather than
// caring about IEEE semantics.
#[derive(Copy, Clone, Debug)]
pub struct RawDouble(pub f64);
impl PartialEq for RawDouble {
fn eq(&self, other: &Self) -> bool { self.0.to_bits() == other.0.to_bits() }
}
impl Eq for RawDouble {}
impl Ord for RawDouble {
fn cmp(&self, other: &Self) -> core::cmp::Ordering { self.0.to_bits().cmp(&other.0.to_bits()) }
}
impl PartialOrd for RawDouble {
fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> { Some(self.cmp(other)) }
}

View File

@ -1954,4 +1954,5 @@ Object {
split_meta: None,
path: None,
timestamp: None,
flow_analysis_results: {},
}

View File

@ -3826,4 +3826,5 @@ Object {
split_meta: None,
path: None,
timestamp: None,
flow_analysis_results: {},
}

View File

@ -1490,4 +1490,5 @@ Object {
split_meta: None,
path: None,
timestamp: None,
flow_analysis_results: {},
}

View File

@ -548,4 +548,5 @@ Object {
split_meta: None,
path: None,
timestamp: None,
flow_analysis_results: {},
}

View File

@ -581,4 +581,5 @@ Object {
),
path: None,
timestamp: None,
flow_analysis_results: {},
}

View File

@ -207,4 +207,5 @@ Object {
split_meta: None,
path: None,
timestamp: None,
flow_analysis_results: {},
}

View File

@ -1574,4 +1574,5 @@ Object {
split_meta: None,
path: None,
timestamp: None,
flow_analysis_results: {},
}

View File

@ -311,4 +311,5 @@ Object {
split_meta: None,
path: None,
timestamp: None,
flow_analysis_results: {},
}

View File

@ -160,4 +160,5 @@ Object {
split_meta: None,
path: None,
timestamp: None,
flow_analysis_results: {},
}

View File

@ -23,6 +23,8 @@ pub struct Appearance {
#[serde(skip)]
pub highlight_color: Color32, // WHITE
#[serde(skip)]
pub dataflow_color: Color32, //
#[serde(skip)]
pub replace_color: Color32, // LIGHT_BLUE
#[serde(skip)]
pub insert_color: Color32, // GREEN
@ -61,6 +63,7 @@ impl Default for Appearance {
emphasized_text_color: Color32::LIGHT_GRAY,
deemphasized_text_color: Color32::DARK_GRAY,
highlight_color: Color32::WHITE,
dataflow_color: Color32::from_rgb(0, 128, 128),
replace_color: Color32::LIGHT_BLUE,
insert_color: Color32::GREEN,
delete_color: Color32::from_rgb(200, 40, 41),
@ -104,6 +107,7 @@ impl Appearance {
self.emphasized_text_color = Color32::LIGHT_GRAY;
self.deemphasized_text_color = Color32::DARK_GRAY;
self.highlight_color = Color32::WHITE;
self.dataflow_color = Color32::from_rgb(0, 128, 128);
self.replace_color = Color32::LIGHT_BLUE;
self.insert_color = Color32::GREEN;
self.delete_color = Color32::from_rgb(200, 40, 41);
@ -114,6 +118,7 @@ impl Appearance {
self.emphasized_text_color = Color32::DARK_GRAY;
self.deemphasized_text_color = Color32::LIGHT_GRAY;
self.highlight_color = Color32::BLACK;
self.dataflow_color = Color32::from_rgb(0, 128, 128);
self.replace_color = Color32::DARK_BLUE;
self.insert_color = Color32::DARK_GREEN;
self.delete_color = Color32::from_rgb(200, 40, 41);

View File

@ -281,6 +281,24 @@ pub fn diff_view_ui(
})
});
}
// Only need to check the first Object. Technically the first could not have a flow analysis
// result while the second does but we don't want to waste space on two separate checkboxes.
if state.current_view == View::FunctionDiff
&& result
.first_obj
.as_ref()
.is_some_and(|(first, _)| first.has_flow_analysis_result())
{
let mut value = diff_config.show_data_flow;
if ui
.checkbox(&mut value, "Show data flow")
.on_hover_text("Show data flow analysis results in place of register names")
.clicked()
{
ret = Some(DiffViewAction::SetShowDataFlow(value));
}
}
} else if column == 1 {
// Right column

View File

@ -174,6 +174,7 @@ fn diff_text_ui(
DiffTextColor::Normal => appearance.text_color,
DiffTextColor::Dim => appearance.deemphasized_text_color,
DiffTextColor::Bright => appearance.emphasized_text_color,
DiffTextColor::DataFlow => appearance.dataflow_color,
DiffTextColor::Replace => appearance.replace_color,
DiffTextColor::Delete => appearance.delete_color,
DiffTextColor::Insert => appearance.insert_color,

View File

@ -79,6 +79,8 @@ pub enum DiffViewAction {
SetMapping(usize, usize),
/// Set the show_mapped_symbols flag
SetShowMappedSymbols(bool),
/// Set the show_data_flow flag
SetShowDataFlow(bool),
}
#[derive(Debug, Clone, Default, Eq, PartialEq)]
@ -350,6 +352,12 @@ impl DiffViewState {
DiffViewAction::SetShowMappedSymbols(value) => {
self.symbol_state.show_mapped_symbols = value;
}
DiffViewAction::SetShowDataFlow(value) => {
let Ok(mut state) = state.write() else {
return;
};
state.config.diff_obj_config.show_data_flow = value;
}
}
}

View File

@ -401,6 +401,7 @@ impl From<diff::display::DiffTextColor> for DiffTextColor {
diff::display::DiffTextColor::Replace => DiffTextColor::Replace,
diff::display::DiffTextColor::Delete => DiffTextColor::Delete,
diff::display::DiffTextColor::Insert => DiffTextColor::Insert,
diff::display::DiffTextColor::DataFlow => DiffTextColor::DataFlow,
diff::display::DiffTextColor::Rotating(v) => DiffTextColor::Rotating(v),
}
}

View File

@ -201,6 +201,7 @@ interface display {
dim,
bright,
replace,
data-flow,
delete,
insert,
rotating(u8),