Implement control-flow graph detection (#12)
Closes #9 Closes #10 * add flow graph crate * update authors * implement control flow graph analysis * detect if program flow falls through the end of a block * relax itertools version constraint
This commit is contained in:
parent
4fc2d32e02
commit
e7a257ac64
|
@ -4,3 +4,5 @@
|
|||
*.profraw
|
||||
env/
|
||||
lib/
|
||||
*.bin
|
||||
*.png
|
||||
|
|
|
@ -2,6 +2,26 @@
|
|||
# It is not intended for manual editing.
|
||||
version = 3
|
||||
|
||||
[[package]]
|
||||
name = "ansi_term"
|
||||
version = "0.11.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b"
|
||||
dependencies = [
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "atty"
|
||||
version = "0.2.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8"
|
||||
dependencies = [
|
||||
"hermit-abi",
|
||||
"libc",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "autocfg"
|
||||
version = "1.0.1"
|
||||
|
@ -20,6 +40,21 @@ version = "1.0.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
|
||||
|
||||
[[package]]
|
||||
name = "clap"
|
||||
version = "2.33.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "37e58ac78573c40708d45522f0d80fa2f01cc4f9b4e2bf749807255454312002"
|
||||
dependencies = [
|
||||
"ansi_term",
|
||||
"atty",
|
||||
"bitflags",
|
||||
"strsim",
|
||||
"textwrap",
|
||||
"unicode-width",
|
||||
"vec_map",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ctor"
|
||||
version = "0.1.20"
|
||||
|
@ -30,6 +65,18 @@ dependencies = [
|
|||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "either"
|
||||
version = "1.6.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457"
|
||||
|
||||
[[package]]
|
||||
name = "fixedbitset"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "398ea4fabe40b9b0d885340a2a991a44c8a645624075ad966d21f88688e2b69e"
|
||||
|
||||
[[package]]
|
||||
name = "getrandom"
|
||||
version = "0.1.16"
|
||||
|
@ -52,6 +99,12 @@ dependencies = [
|
|||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hashbrown"
|
||||
version = "0.11.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ab5ef0d4909ef3724cc8cce6ccc8572c5c817592e9285f5464f8e86f8bd3726e"
|
||||
|
||||
[[package]]
|
||||
name = "hermit-abi"
|
||||
version = "0.1.19"
|
||||
|
@ -61,6 +114,16 @@ dependencies = [
|
|||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "indexmap"
|
||||
version = "1.7.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bc633605454125dec4b66843673f01c7df2b89479b32e0ed634e43a91cff62a5"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
"hashbrown",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "indoc"
|
||||
version = "0.3.6"
|
||||
|
@ -115,6 +178,15 @@ dependencies = [
|
|||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "itertools"
|
||||
version = "0.10.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "69ddb889f9d0d08a67338271fa9b62996bc788c7796a5c18cf057420aaed5eaf"
|
||||
dependencies = [
|
||||
"either",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.99"
|
||||
|
@ -180,6 +252,15 @@ dependencies = [
|
|||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "parse_int"
|
||||
version = "0.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "15f96500577cfa0a3bad8a88a3c4daa66684828af2e7d349012fa7fc3c725f0c"
|
||||
dependencies = [
|
||||
"num-traits",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "paste"
|
||||
version = "0.1.18"
|
||||
|
@ -199,6 +280,16 @@ dependencies = [
|
|||
"proc-macro-hack",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "petgraph"
|
||||
version = "0.6.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4a13a2fa9d0b63e5f22328828741e523766fff0ee9e779316902290dff3f824f"
|
||||
dependencies = [
|
||||
"fixedbitset",
|
||||
"indexmap",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ppc750cl"
|
||||
version = "0.1.1"
|
||||
|
@ -207,6 +298,17 @@ dependencies = [
|
|||
"ppc750cl-macros",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ppc750cl-flow-graph"
|
||||
version = "0.1.1"
|
||||
dependencies = [
|
||||
"clap",
|
||||
"itertools",
|
||||
"parse_int",
|
||||
"petgraph",
|
||||
"ppc750cl",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ppc750cl-fuzz"
|
||||
version = "0.1.1"
|
||||
|
@ -392,6 +494,12 @@ version = "1.6.1"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fe0f37c9e8f3c5a4a66ad655a93c74daac4ad00c441533bf5c6e7990bb42604e"
|
||||
|
||||
[[package]]
|
||||
name = "strsim"
|
||||
version = "0.8.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a"
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "1.0.74"
|
||||
|
@ -403,6 +511,21 @@ dependencies = [
|
|||
"unicode-xid",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "textwrap"
|
||||
version = "0.11.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060"
|
||||
dependencies = [
|
||||
"unicode-width",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "unicode-width"
|
||||
version = "0.1.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9337591893a19b88d8d87f2cec1e73fad5cdfd10e5a6f349f498ad6ea2ffb1e3"
|
||||
|
||||
[[package]]
|
||||
name = "unicode-xid"
|
||||
version = "0.2.2"
|
||||
|
@ -415,6 +538,12 @@ version = "0.1.7"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f14ee04d9415b52b3aeab06258a3f07093182b88ba0f9b8d203f211a7a7d41c7"
|
||||
|
||||
[[package]]
|
||||
name = "vec_map"
|
||||
version = "0.8.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f1bddf1187be692e79c5ffeab891132dfb0f236ed36a43c7ed39f1165ee20191"
|
||||
|
||||
[[package]]
|
||||
name = "wasi"
|
||||
version = "0.9.0+wasi-snapshot-preview1"
|
||||
|
|
|
@ -4,5 +4,6 @@ members = [
|
|||
"disasm-py",
|
||||
"macros",
|
||||
"fuzz",
|
||||
"flow-graph",
|
||||
"rand",
|
||||
]
|
||||
|
|
|
@ -238,6 +238,8 @@ isa! {
|
|||
}
|
||||
|
||||
impl Opcode {
|
||||
pub const BLR: u32 = 0x4c000020;
|
||||
|
||||
pub fn from_code(x: u32) -> Self {
|
||||
let op = match bits(x, 0..6) {
|
||||
0b000011 => Opcode::Twi,
|
||||
|
|
|
@ -152,6 +152,24 @@ impl Ins {
|
|||
ins_ufield!(ps_l, u8, 17..20);
|
||||
ins_ifield!(ps_d, 20..32);
|
||||
|
||||
pub fn branch_offset(&self) -> Option<i32> {
|
||||
match self.op {
|
||||
Opcode::B => Some(self.li()),
|
||||
Opcode::Bc | Opcode::Bcctr | Opcode::Bclr => Some(self.bd()),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn branch_dest(&self) -> Option<u32> {
|
||||
self.branch_offset().and_then(|offset| {
|
||||
if offset < 0 {
|
||||
self.addr.checked_sub((-offset) as u32)
|
||||
} else {
|
||||
self.addr.checked_add(offset as u32)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
fn write_asm_mtfsfi<F, W>(&self, out: &mut F) -> std::io::Result<()>
|
||||
where
|
||||
F: AsmFormatter<W>,
|
||||
|
|
|
@ -0,0 +1,15 @@
|
|||
[package]
|
||||
name = "ppc750cl-flow-graph"
|
||||
version = "0.1.1"
|
||||
edition = "2018"
|
||||
authors = ["riidefi <riidefi@rii.dev>", "Richard Patel <me@terorie.dev>"]
|
||||
license = "GPL-3.0-or-later"
|
||||
description = "Control flow graph analysis for PowerPC 750CL"
|
||||
repository = "https://github.com/terorie/ppc750cl"
|
||||
|
||||
[dependencies]
|
||||
clap = "2.33"
|
||||
itertools = "0.10"
|
||||
parse_int = "0.5"
|
||||
petgraph = "0.6"
|
||||
ppc750cl = { version = "0.1.1", path = "../disasm" }
|
|
@ -0,0 +1,210 @@
|
|||
use std::collections::{BTreeMap, BTreeSet, HashSet};
|
||||
use std::fmt::{Debug, Display, Formatter};
|
||||
use std::ops::Range;
|
||||
|
||||
use clap::clap_app;
|
||||
use petgraph::dot::{Config as DotConfig, Dot};
|
||||
use petgraph::graph::{DefaultIx, NodeIndex};
|
||||
use petgraph::Graph;
|
||||
|
||||
use itertools::Itertools;
|
||||
use ppc750cl::{disasm_iter, Ins, Opcode};
|
||||
|
||||
fn main() {
|
||||
let matches = clap_app!(myapp =>
|
||||
(version: "1.0")
|
||||
(about: "Control flow graph analysis for PowerPC 750CL")
|
||||
(@arg ADDR: --addr +required +takes_value "Address")
|
||||
(@arg INPUT: +required "Binary input file")
|
||||
)
|
||||
.get_matches();
|
||||
|
||||
let addr = matches.value_of("ADDR").unwrap();
|
||||
let addr: u32 = ::parse_int::parse(addr).expect("Invalid address flag");
|
||||
|
||||
let file_path = matches.value_of("INPUT").unwrap();
|
||||
let bytes = std::fs::read(file_path).expect("Failed to read file");
|
||||
|
||||
// Create control flow graph.
|
||||
let ins_list: Vec<Ins> = disasm_iter(&bytes, addr).collect();
|
||||
let basic_slices = BasicSlices::from_code(&ins_list);
|
||||
let graph = basic_slices.to_control_flow_graph(&ins_list);
|
||||
|
||||
// Output graphviz.
|
||||
let graphviz = Dot::with_config(
|
||||
&graph,
|
||||
&[DotConfig::EdgeNoLabel, DotConfig::GraphContentOnly],
|
||||
);
|
||||
println!(
|
||||
concat!(
|
||||
"digraph func {{\n",
|
||||
"node [shape=record fontname=Arial];\n",
|
||||
"{:?}\n",
|
||||
"}}"
|
||||
),
|
||||
graphviz
|
||||
);
|
||||
}
|
||||
|
||||
/// The instruction address divided by four.
|
||||
type CodeIdx = u32;
|
||||
|
||||
struct BasicSlices {
|
||||
/// The indexes separating instructions into basic blocks.
|
||||
/// Used to create a list of consecutive basic blocks.
|
||||
cuts: BTreeSet<CodeIdx>,
|
||||
/// The possible branches from one instruction to another.
|
||||
/// Used to link together basic blocks into a directed graph.
|
||||
branches: HashSet<(CodeIdx, CodeIdx)>,
|
||||
}
|
||||
|
||||
impl BasicSlices {
|
||||
/// Computes basic slices from instructions.
|
||||
fn from_code(code: &[Ins]) -> Self {
|
||||
let mut cuts = BTreeSet::<CodeIdx>::new();
|
||||
let mut branches = HashSet::<(CodeIdx, CodeIdx)>::new();
|
||||
for ins in code {
|
||||
let cur_index = ins.addr / 4;
|
||||
let is_control_flow_ins = match ins.op {
|
||||
// Direct branches are control flow instructions if they don't save the link register.
|
||||
// If they do, we encountered a function call.
|
||||
Opcode::B | Opcode::Bc => ins.lk() == 0,
|
||||
// Switch table
|
||||
Opcode::Bcctr => panic!("jump tables not supported yet"),
|
||||
_ => false,
|
||||
};
|
||||
if !is_control_flow_ins {
|
||||
continue;
|
||||
}
|
||||
// We encountered some kind of control flow instruction.
|
||||
if ins.code != Opcode::BLR {
|
||||
// There's a possibility that branch can be taken.
|
||||
// Branch destinations are always the first instruction of a block.
|
||||
// Thus, we also found the end of another block.
|
||||
let new_index = ins.branch_dest().unwrap() / 4;
|
||||
cuts.insert(new_index);
|
||||
branches.insert((cur_index, new_index));
|
||||
}
|
||||
if is_conditional_branch(ins) {
|
||||
// There's a possibility that branch is not taken.
|
||||
// End block anyways.
|
||||
cuts.insert(cur_index + 1);
|
||||
branches.insert((cur_index, cur_index + 1));
|
||||
}
|
||||
}
|
||||
Self { cuts, branches }
|
||||
}
|
||||
|
||||
/// Creates a control-flow graph.
|
||||
fn to_control_flow_graph<'a>(&self, code: &'a [Ins]) -> Graph<BasicBlock<'a>, ()> {
|
||||
if code.is_empty() {
|
||||
return Graph::new();
|
||||
}
|
||||
// Walk set cuts and create basic blocks.
|
||||
let mut graph = Graph::new();
|
||||
let mut node_by_addr = BTreeMap::<u32, NodeIndex<DefaultIx>>::new();
|
||||
let mut block_start: CodeIdx = code[0].addr / 4;
|
||||
for cut in &self.cuts {
|
||||
if *cut > block_start {
|
||||
node_by_addr.insert(
|
||||
block_start,
|
||||
graph.add_node(BasicBlock::from_code_slice(block_start..*cut, code)),
|
||||
);
|
||||
}
|
||||
block_start = *cut;
|
||||
}
|
||||
// Last block.
|
||||
let func_end: CodeIdx = (code.last().unwrap().addr / 4) + 1;
|
||||
if func_end > block_start {
|
||||
node_by_addr.insert(
|
||||
block_start,
|
||||
graph.add_node(BasicBlock::from_code_slice(block_start..func_end, code)),
|
||||
);
|
||||
}
|
||||
// Walk set of branches and connect graph.
|
||||
for branch in &self.branches {
|
||||
let src_node_idx = match node_by_addr.range(..branch.0 + 1).last() {
|
||||
None => continue,
|
||||
Some(idx) => *idx.1,
|
||||
};
|
||||
debug_assert!(graph[src_node_idx].range.contains(&branch.0));
|
||||
let dst_node_idx = match node_by_addr.range(..branch.1 + 1).last() {
|
||||
None => continue,
|
||||
Some(idx) => *idx.1,
|
||||
};
|
||||
debug_assert!(graph[dst_node_idx].range.contains(&branch.1));
|
||||
graph.add_edge(src_node_idx, dst_node_idx, ());
|
||||
}
|
||||
// Walk blocks and re-connect nodes that were split off.
|
||||
for (src_node_idx, dst_node_idx) in node_by_addr.values().tuple_windows::<(_, _)>() {
|
||||
// Get pairs of two blocks as a sliding window.
|
||||
let src_block: &BasicBlock = &graph[*src_node_idx];
|
||||
let dst_block: &BasicBlock = &graph[*dst_node_idx];
|
||||
assert_eq!(src_block.range.end, dst_block.range.start);
|
||||
// Get last instruction of left block.
|
||||
// Unless it's an unconditional branch, we can connect the blocks.
|
||||
let last_ins = src_block.code.last().unwrap();
|
||||
if last_ins.code == Opcode::BLR
|
||||
|| (last_ins.op == Opcode::B && last_ins.bo() == 0b10100)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
// Execution can continue past the last instruction of a block,
|
||||
// so re-connect two blocks that were split off.
|
||||
if !graph.contains_edge(*src_node_idx, *dst_node_idx) {
|
||||
graph.add_edge(*src_node_idx, *dst_node_idx, ());
|
||||
}
|
||||
}
|
||||
graph
|
||||
}
|
||||
}
|
||||
|
||||
fn is_conditional_branch(ins: &Ins) -> bool {
|
||||
match ins.op {
|
||||
Opcode::Bc | Opcode::Bcctr | Opcode::Bclr => (),
|
||||
_ => return false,
|
||||
};
|
||||
// Check whether bits "branch always".
|
||||
ins.bo() & 0b10100 != 0b10100
|
||||
}
|
||||
|
||||
struct BasicBlock<'a> {
|
||||
range: Range<CodeIdx>,
|
||||
code: &'a [Ins],
|
||||
}
|
||||
|
||||
impl<'a> BasicBlock<'a> {
|
||||
fn from_code_slice(range: Range<CodeIdx>, complete_code: &'a [Ins]) -> BasicBlock {
|
||||
let start_idx = complete_code.first().unwrap().addr / 4;
|
||||
assert!(start_idx <= range.start);
|
||||
let offset = (range.start - start_idx) as usize;
|
||||
let code = &complete_code[offset..(offset + (range.len() as usize))];
|
||||
BasicBlock { range, code }
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Display for BasicBlock<'a> {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
writeln!(
|
||||
f,
|
||||
"{:0>#8x}..{:0>#8x}",
|
||||
self.range.start * 4,
|
||||
self.range.end * 4
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Debug for BasicBlock<'a> {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
writeln!(
|
||||
f,
|
||||
"// {:0>#8x}..{:0>#8x}",
|
||||
self.range.start * 4,
|
||||
self.range.end * 4
|
||||
)?;
|
||||
for ins in self.code {
|
||||
writeln!(f, "{}", ins.to_string())?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue