Restructure, implement naive data ref detection
This commit is contained in:
parent
7ddae6e170
commit
7b63bfa533
|
@ -132,11 +132,7 @@ fn disasm_iter(
|
|||
size: Option<u32>,
|
||||
) -> PyResult<DisasmIterator> {
|
||||
let left = match size {
|
||||
None => code
|
||||
.as_bytes()
|
||||
.len()
|
||||
.checked_sub(offset as usize)
|
||||
.unwrap_or(0),
|
||||
None => code.as_bytes().len().saturating_sub(offset as usize),
|
||||
Some(v) => v as usize,
|
||||
};
|
||||
Ok(DisasmIterator {
|
||||
|
|
|
@ -0,0 +1,174 @@
|
|||
use std::collections::{BTreeMap, HashMap};
|
||||
use std::fmt::{Debug, Display, Formatter};
|
||||
use std::hash::{Hash, Hasher};
|
||||
use std::ops::{Index, Range};
|
||||
|
||||
use itertools::Itertools;
|
||||
use petgraph::algo::dominators::Dominators;
|
||||
use petgraph::graph::{DefaultIx, NodeIndex};
|
||||
use petgraph::Graph;
|
||||
|
||||
use ppc750cl::{Ins, Opcode};
|
||||
|
||||
use crate::slices::{BasicSlices, CodeIdx};
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct BasicBlock<'a> {
|
||||
pub range: Range<CodeIdx>,
|
||||
pub code: &'a [Ins],
|
||||
pub data_refs: HashMap<CodeIdx, u32>,
|
||||
}
|
||||
|
||||
impl<'a> PartialEq for BasicBlock<'a> {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.range == other.range
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Eq for BasicBlock<'a> {}
|
||||
|
||||
impl<'a> Hash for BasicBlock<'a> {
|
||||
fn hash<H: Hasher>(&self, state: &mut H) {
|
||||
self.range.hash(state)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> BasicBlock<'a> {
|
||||
pub fn from_code_slice(range: Range<CodeIdx>, complete_code: &'a [Ins]) -> BasicBlock {
|
||||
let start_idx = complete_code.first().unwrap().addr / 4;
|
||||
assert!(start_idx <= range.start);
|
||||
let offset = (range.start - start_idx) as usize;
|
||||
let code = &complete_code[offset..(offset + (range.len() as usize))];
|
||||
BasicBlock {
|
||||
range,
|
||||
code,
|
||||
data_refs: Self::detect_data_refs(code),
|
||||
}
|
||||
}
|
||||
|
||||
/// Very simple algorithm to detect data references.
|
||||
fn detect_data_refs(code: &[Ins]) -> HashMap<CodeIdx, u32> {
|
||||
let mut defs = HashMap::<u8, u16>::new();
|
||||
let mut data_refs = HashMap::<CodeIdx, u32>::new();
|
||||
for ins in code {
|
||||
match ins.op {
|
||||
Opcode::Addis => {
|
||||
if ins.a() == 0 {
|
||||
// lis
|
||||
defs.insert(ins.d(), ins.uimm());
|
||||
} else {
|
||||
defs.remove(&ins.d());
|
||||
}
|
||||
}
|
||||
Opcode::Addi => {
|
||||
if let Some(hi) = defs.get(&ins.a()) {
|
||||
data_refs.insert(ins.addr / 4, ((*hi as u32) << 16) + (ins.uimm() as u32));
|
||||
}
|
||||
defs.remove(&ins.d());
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
data_refs
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Display for BasicBlock<'a> {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "{:0>#8x}", self.range.start * 4)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Debug for BasicBlock<'a> {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
writeln!(
|
||||
f,
|
||||
"// {:0>#8x}..{:0>#8x}",
|
||||
self.range.start * 4,
|
||||
self.range.end * 4
|
||||
)?;
|
||||
for ins in self.code {
|
||||
writeln!(f, "{}", ins.to_string())?;
|
||||
if let Some(addr) = self.data_refs.get(&(ins.addr / 4)) {
|
||||
writeln!(f, " ref: {:0>#8x}", addr)?;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// A control-flow graph of a function.
|
||||
pub struct FlowGraph<'a> {
|
||||
pub graph: Graph<BasicBlock<'a>, ()>,
|
||||
pub root_idx: NodeIndex,
|
||||
}
|
||||
|
||||
impl<'a> FlowGraph<'a> {
|
||||
/// Creates a control-flow graph from basic slices.
|
||||
pub fn from_basic_slices(slices: &BasicSlices, code: &'a [Ins]) -> Self {
|
||||
assert!(!code.is_empty(), "Attempt to create empty flow graph");
|
||||
// Walk set cuts and create basic blocks.
|
||||
let mut graph = Graph::new();
|
||||
let mut node_by_addr = BTreeMap::<u32, NodeIndex<DefaultIx>>::new();
|
||||
let mut block_start: CodeIdx = code[0].addr / 4;
|
||||
for cut in &slices.cuts {
|
||||
if *cut > block_start {
|
||||
node_by_addr.insert(
|
||||
block_start,
|
||||
graph.add_node(BasicBlock::from_code_slice(block_start..*cut, code)),
|
||||
);
|
||||
}
|
||||
block_start = *cut;
|
||||
}
|
||||
// Last block.
|
||||
let func_end: CodeIdx = (code.last().unwrap().addr / 4) + 1;
|
||||
if func_end > block_start {
|
||||
node_by_addr.insert(
|
||||
block_start,
|
||||
graph.add_node(BasicBlock::from_code_slice(block_start..func_end, code)),
|
||||
);
|
||||
}
|
||||
// Walk set of branches and connect graph.
|
||||
for branch in &slices.branches {
|
||||
let src_node_idx = match node_by_addr.range(..branch.0 + 1).last() {
|
||||
None => continue,
|
||||
Some(idx) => *idx.1,
|
||||
};
|
||||
debug_assert!(graph[src_node_idx].range.contains(&branch.0));
|
||||
let dst_node_idx = match node_by_addr.range(..branch.1 + 1).last() {
|
||||
None => continue,
|
||||
Some(idx) => *idx.1,
|
||||
};
|
||||
debug_assert!(graph[dst_node_idx].range.contains(&branch.1));
|
||||
graph.add_edge(src_node_idx, dst_node_idx, ());
|
||||
}
|
||||
// Walk blocks and re-connect nodes that were split off.
|
||||
for (src_node_idx, dst_node_idx) in node_by_addr.values().tuple_windows::<(_, _)>() {
|
||||
// Get pairs of two blocks as a sliding window.
|
||||
let src_block: &BasicBlock = &graph[*src_node_idx];
|
||||
let dst_block: &BasicBlock = &graph[*dst_node_idx];
|
||||
assert_eq!(src_block.range.end, dst_block.range.start);
|
||||
// Get last instruction of left block.
|
||||
// Unless it's an unconditional branch, we can connect the blocks.
|
||||
let last_ins = &src_block.code.last().unwrap();
|
||||
if last_ins.code == Opcode::BLR
|
||||
|| (last_ins.op == Opcode::B && last_ins.bo() == 0b10100)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
// Execution can continue past the last instruction of a block,
|
||||
// so re-connect two blocks that were split off.
|
||||
if !graph.contains_edge(*src_node_idx, *dst_node_idx) {
|
||||
graph.add_edge(*src_node_idx, *dst_node_idx, ());
|
||||
}
|
||||
}
|
||||
Self {
|
||||
graph,
|
||||
root_idx: *node_by_addr.index(node_by_addr.keys().next().unwrap()),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn dominators(&self) -> Dominators<NodeIndex> {
|
||||
petgraph::algo::dominators::simple_fast(&self.graph, self.root_idx)
|
||||
}
|
||||
}
|
|
@ -1,14 +1,13 @@
|
|||
use std::collections::{BTreeMap, BTreeSet, HashSet};
|
||||
use std::fmt::{Debug, Display, Formatter};
|
||||
use std::ops::Range;
|
||||
|
||||
use clap::clap_app;
|
||||
use petgraph::dot::{Config as DotConfig, Dot};
|
||||
use petgraph::graph::{DefaultIx, NodeIndex};
|
||||
use petgraph::Graph;
|
||||
|
||||
use itertools::Itertools;
|
||||
use ppc750cl::{disasm_iter, Ins, Opcode};
|
||||
use ppc750cl::{disasm_iter, Ins};
|
||||
|
||||
pub mod flow;
|
||||
pub mod slices;
|
||||
|
||||
use crate::flow::FlowGraph;
|
||||
use crate::slices::BasicSlices;
|
||||
|
||||
fn main() {
|
||||
let matches = clap_app!(myapp =>
|
||||
|
@ -28,11 +27,11 @@ fn main() {
|
|||
// Create control flow graph.
|
||||
let ins_list: Vec<Ins> = disasm_iter(&bytes, addr).collect();
|
||||
let basic_slices = BasicSlices::from_code(&ins_list);
|
||||
let graph = basic_slices.to_control_flow_graph(&ins_list);
|
||||
let graph = FlowGraph::from_basic_slices(&basic_slices, &ins_list);
|
||||
|
||||
// Output graphviz.
|
||||
let graphviz = Dot::with_config(
|
||||
&graph,
|
||||
&graph.graph,
|
||||
&[DotConfig::EdgeNoLabel, DotConfig::GraphContentOnly],
|
||||
);
|
||||
println!(
|
||||
|
@ -45,166 +44,3 @@ fn main() {
|
|||
graphviz
|
||||
);
|
||||
}
|
||||
|
||||
/// The instruction address divided by four.
|
||||
type CodeIdx = u32;
|
||||
|
||||
struct BasicSlices {
|
||||
/// The indexes separating instructions into basic blocks.
|
||||
/// Used to create a list of consecutive basic blocks.
|
||||
cuts: BTreeSet<CodeIdx>,
|
||||
/// The possible branches from one instruction to another.
|
||||
/// Used to link together basic blocks into a directed graph.
|
||||
branches: HashSet<(CodeIdx, CodeIdx)>,
|
||||
}
|
||||
|
||||
impl BasicSlices {
|
||||
/// Computes basic slices from instructions.
|
||||
fn from_code(code: &[Ins]) -> Self {
|
||||
let mut cuts = BTreeSet::<CodeIdx>::new();
|
||||
let mut branches = HashSet::<(CodeIdx, CodeIdx)>::new();
|
||||
for ins in code {
|
||||
let cur_index = ins.addr / 4;
|
||||
let is_control_flow_ins = match ins.op {
|
||||
// Direct branches are control flow instructions if they don't save the link register.
|
||||
// If they do, we encountered a function call.
|
||||
Opcode::B | Opcode::Bc => ins.lk() == 0,
|
||||
// Switch table
|
||||
Opcode::Bcctr => panic!("jump tables not supported yet"),
|
||||
_ => false,
|
||||
};
|
||||
if !is_control_flow_ins {
|
||||
continue;
|
||||
}
|
||||
// We encountered some kind of control flow instruction.
|
||||
if ins.code != Opcode::BLR {
|
||||
// There's a possibility that branch can be taken.
|
||||
// Branch destinations are always the first instruction of a block.
|
||||
// Thus, we also found the end of another block.
|
||||
let new_index = ins.branch_dest().unwrap() / 4;
|
||||
cuts.insert(new_index);
|
||||
branches.insert((cur_index, new_index));
|
||||
}
|
||||
if is_conditional_branch(ins) {
|
||||
// There's a possibility that branch is not taken.
|
||||
// End block anyways.
|
||||
cuts.insert(cur_index + 1);
|
||||
branches.insert((cur_index, cur_index + 1));
|
||||
}
|
||||
}
|
||||
Self { cuts, branches }
|
||||
}
|
||||
|
||||
/// Creates a control-flow graph.
|
||||
fn to_control_flow_graph<'a>(&self, code: &'a [Ins]) -> Graph<BasicBlock<'a>, ()> {
|
||||
if code.is_empty() {
|
||||
return Graph::new();
|
||||
}
|
||||
// Walk set cuts and create basic blocks.
|
||||
let mut graph = Graph::new();
|
||||
let mut node_by_addr = BTreeMap::<u32, NodeIndex<DefaultIx>>::new();
|
||||
let mut block_start: CodeIdx = code[0].addr / 4;
|
||||
for cut in &self.cuts {
|
||||
if *cut > block_start {
|
||||
node_by_addr.insert(
|
||||
block_start,
|
||||
graph.add_node(BasicBlock::from_code_slice(block_start..*cut, code)),
|
||||
);
|
||||
}
|
||||
block_start = *cut;
|
||||
}
|
||||
// Last block.
|
||||
let func_end: CodeIdx = (code.last().unwrap().addr / 4) + 1;
|
||||
if func_end > block_start {
|
||||
node_by_addr.insert(
|
||||
block_start,
|
||||
graph.add_node(BasicBlock::from_code_slice(block_start..func_end, code)),
|
||||
);
|
||||
}
|
||||
// Walk set of branches and connect graph.
|
||||
for branch in &self.branches {
|
||||
let src_node_idx = match node_by_addr.range(..branch.0 + 1).last() {
|
||||
None => continue,
|
||||
Some(idx) => *idx.1,
|
||||
};
|
||||
debug_assert!(graph[src_node_idx].range.contains(&branch.0));
|
||||
let dst_node_idx = match node_by_addr.range(..branch.1 + 1).last() {
|
||||
None => continue,
|
||||
Some(idx) => *idx.1,
|
||||
};
|
||||
debug_assert!(graph[dst_node_idx].range.contains(&branch.1));
|
||||
graph.add_edge(src_node_idx, dst_node_idx, ());
|
||||
}
|
||||
// Walk blocks and re-connect nodes that were split off.
|
||||
for (src_node_idx, dst_node_idx) in node_by_addr.values().tuple_windows::<(_, _)>() {
|
||||
// Get pairs of two blocks as a sliding window.
|
||||
let src_block: &BasicBlock = &graph[*src_node_idx];
|
||||
let dst_block: &BasicBlock = &graph[*dst_node_idx];
|
||||
assert_eq!(src_block.range.end, dst_block.range.start);
|
||||
// Get last instruction of left block.
|
||||
// Unless it's an unconditional branch, we can connect the blocks.
|
||||
let last_ins = src_block.code.last().unwrap();
|
||||
if last_ins.code == Opcode::BLR
|
||||
|| (last_ins.op == Opcode::B && last_ins.bo() == 0b10100)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
// Execution can continue past the last instruction of a block,
|
||||
// so re-connect two blocks that were split off.
|
||||
if !graph.contains_edge(*src_node_idx, *dst_node_idx) {
|
||||
graph.add_edge(*src_node_idx, *dst_node_idx, ());
|
||||
}
|
||||
}
|
||||
graph
|
||||
}
|
||||
}
|
||||
|
||||
fn is_conditional_branch(ins: &Ins) -> bool {
|
||||
match ins.op {
|
||||
Opcode::Bc | Opcode::Bcctr | Opcode::Bclr => (),
|
||||
_ => return false,
|
||||
};
|
||||
// Check whether bits "branch always".
|
||||
ins.bo() & 0b10100 != 0b10100
|
||||
}
|
||||
|
||||
struct BasicBlock<'a> {
|
||||
range: Range<CodeIdx>,
|
||||
code: &'a [Ins],
|
||||
}
|
||||
|
||||
impl<'a> BasicBlock<'a> {
|
||||
fn from_code_slice(range: Range<CodeIdx>, complete_code: &'a [Ins]) -> BasicBlock {
|
||||
let start_idx = complete_code.first().unwrap().addr / 4;
|
||||
assert!(start_idx <= range.start);
|
||||
let offset = (range.start - start_idx) as usize;
|
||||
let code = &complete_code[offset..(offset + (range.len() as usize))];
|
||||
BasicBlock { range, code }
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Display for BasicBlock<'a> {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
writeln!(
|
||||
f,
|
||||
"{:0>#8x}..{:0>#8x}",
|
||||
self.range.start * 4,
|
||||
self.range.end * 4
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Debug for BasicBlock<'a> {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
writeln!(
|
||||
f,
|
||||
"// {:0>#8x}..{:0>#8x}",
|
||||
self.range.start * 4,
|
||||
self.range.end * 4
|
||||
)?;
|
||||
for ins in self.code {
|
||||
writeln!(f, "{}", ins.to_string())?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,62 @@
|
|||
use std::collections::{BTreeSet, HashSet};
|
||||
|
||||
use ppc750cl::{Ins, Opcode};
|
||||
|
||||
/// The instruction address divided by four.
|
||||
pub type CodeIdx = u32;
|
||||
|
||||
pub struct BasicSlices {
|
||||
/// The indexes separating instructions into basic blocks.
|
||||
/// Used to create a list of consecutive basic blocks.
|
||||
pub cuts: BTreeSet<CodeIdx>,
|
||||
/// The possible branches from one instruction to another.
|
||||
/// Used to link together basic blocks into a directed graph.
|
||||
pub branches: HashSet<(CodeIdx, CodeIdx)>,
|
||||
}
|
||||
|
||||
impl BasicSlices {
|
||||
/// Computes basic slices from instructions.
|
||||
pub fn from_code(code: &[Ins]) -> Self {
|
||||
let mut cuts = BTreeSet::<CodeIdx>::new();
|
||||
let mut branches = HashSet::<(CodeIdx, CodeIdx)>::new();
|
||||
for ins in code {
|
||||
let cur_index = ins.addr / 4;
|
||||
let is_control_flow_ins = match ins.op {
|
||||
// Direct branches are control flow instructions if they don't save the link register.
|
||||
// If they do, we encountered a function call.
|
||||
Opcode::B | Opcode::Bc => ins.lk() == 0,
|
||||
// Switch table
|
||||
Opcode::Bcctr => panic!("jump tables not supported yet"),
|
||||
_ => false,
|
||||
};
|
||||
if !is_control_flow_ins {
|
||||
continue;
|
||||
}
|
||||
// We encountered some kind of control flow instruction.
|
||||
if ins.code != Opcode::BLR {
|
||||
// There's a possibility that branch can be taken.
|
||||
// Branch destinations are always the first instruction of a block.
|
||||
// Thus, we also found the end of another block.
|
||||
let new_index = ins.branch_dest().unwrap() / 4;
|
||||
cuts.insert(new_index);
|
||||
branches.insert((cur_index, new_index));
|
||||
}
|
||||
if is_conditional_branch(ins) {
|
||||
// There's a possibility that branch is not taken.
|
||||
// End block anyways.
|
||||
cuts.insert(cur_index + 1);
|
||||
branches.insert((cur_index, cur_index + 1));
|
||||
}
|
||||
}
|
||||
Self { cuts, branches }
|
||||
}
|
||||
}
|
||||
|
||||
fn is_conditional_branch(ins: &Ins) -> bool {
|
||||
match ins.op {
|
||||
Opcode::Bc | Opcode::Bcctr | Opcode::Bclr => (),
|
||||
_ => return false,
|
||||
};
|
||||
// Check whether bits "branch always".
|
||||
ins.bo() & 0b10100 != 0b10100
|
||||
}
|
Loading…
Reference in New Issue