diff --git a/.gitignore b/.gitignore index 48ca60a..541365f 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,5 @@ *.profraw env/ lib/ +*.bin +*.png diff --git a/Cargo.lock b/Cargo.lock index c8b38c6..313f209 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,26 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "ansi_term" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b" +dependencies = [ + "winapi", +] + +[[package]] +name = "atty" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" +dependencies = [ + "hermit-abi", + "libc", + "winapi", +] + [[package]] name = "autocfg" version = "1.0.1" @@ -20,6 +40,21 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "clap" +version = "2.33.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37e58ac78573c40708d45522f0d80fa2f01cc4f9b4e2bf749807255454312002" +dependencies = [ + "ansi_term", + "atty", + "bitflags", + "strsim", + "textwrap", + "unicode-width", + "vec_map", +] + [[package]] name = "ctor" version = "0.1.20" @@ -30,6 +65,18 @@ dependencies = [ "syn", ] +[[package]] +name = "either" +version = "1.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457" + +[[package]] +name = "fixedbitset" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "398ea4fabe40b9b0d885340a2a991a44c8a645624075ad966d21f88688e2b69e" + [[package]] name = "getrandom" version = "0.1.16" @@ -52,6 +99,12 @@ dependencies = [ "syn", ] +[[package]] +name = "hashbrown" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab5ef0d4909ef3724cc8cce6ccc8572c5c817592e9285f5464f8e86f8bd3726e" + [[package]] name = "hermit-abi" version = "0.1.19" @@ -61,6 +114,16 @@ dependencies = [ "libc", ] +[[package]] +name = "indexmap" +version = "1.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc633605454125dec4b66843673f01c7df2b89479b32e0ed634e43a91cff62a5" +dependencies = [ + "autocfg", + "hashbrown", +] + [[package]] name = "indoc" version = "0.3.6" @@ -115,6 +178,15 @@ dependencies = [ "syn", ] +[[package]] +name = "itertools" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69ddb889f9d0d08a67338271fa9b62996bc788c7796a5c18cf057420aaed5eaf" +dependencies = [ + "either", +] + [[package]] name = "libc" version = "0.2.99" @@ -180,6 +252,15 @@ dependencies = [ "winapi", ] +[[package]] +name = "parse_int" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "15f96500577cfa0a3bad8a88a3c4daa66684828af2e7d349012fa7fc3c725f0c" +dependencies = [ + "num-traits", +] + [[package]] name = "paste" version = "0.1.18" @@ -199,6 +280,16 @@ dependencies = [ "proc-macro-hack", ] +[[package]] +name = "petgraph" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4a13a2fa9d0b63e5f22328828741e523766fff0ee9e779316902290dff3f824f" +dependencies = [ + "fixedbitset", + "indexmap", +] + [[package]] name = "ppc750cl" version = "0.1.1" @@ -207,6 +298,17 @@ dependencies = [ "ppc750cl-macros", ] +[[package]] +name = "ppc750cl-flow-graph" +version = "0.1.1" +dependencies = [ + "clap", + "itertools", + "parse_int", + "petgraph", + "ppc750cl", +] + [[package]] name = "ppc750cl-fuzz" version = "0.1.1" @@ -392,6 +494,12 @@ version = "1.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fe0f37c9e8f3c5a4a66ad655a93c74daac4ad00c441533bf5c6e7990bb42604e" +[[package]] +name = "strsim" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a" + [[package]] name = "syn" version = "1.0.74" @@ -403,6 +511,21 @@ dependencies = [ "unicode-xid", ] +[[package]] +name = "textwrap" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060" +dependencies = [ + "unicode-width", +] + +[[package]] +name = "unicode-width" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9337591893a19b88d8d87f2cec1e73fad5cdfd10e5a6f349f498ad6ea2ffb1e3" + [[package]] name = "unicode-xid" version = "0.2.2" @@ -415,6 +538,12 @@ version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f14ee04d9415b52b3aeab06258a3f07093182b88ba0f9b8d203f211a7a7d41c7" +[[package]] +name = "vec_map" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1bddf1187be692e79c5ffeab891132dfb0f236ed36a43c7ed39f1165ee20191" + [[package]] name = "wasi" version = "0.9.0+wasi-snapshot-preview1" diff --git a/Cargo.toml b/Cargo.toml index fafcda0..c42daae 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,5 +4,6 @@ members = [ "disasm-py", "macros", "fuzz", + "flow-graph", "rand", ] diff --git a/disasm/src/isa.rs b/disasm/src/isa.rs index 5e47223..dd646a7 100644 --- a/disasm/src/isa.rs +++ b/disasm/src/isa.rs @@ -238,6 +238,8 @@ isa! { } impl Opcode { + pub const BLR: u32 = 0x4c000020; + pub fn from_code(x: u32) -> Self { let op = match bits(x, 0..6) { 0b000011 => Opcode::Twi, diff --git a/disasm/src/lib.rs b/disasm/src/lib.rs index 4d8e9a4..b373b39 100644 --- a/disasm/src/lib.rs +++ b/disasm/src/lib.rs @@ -152,6 +152,24 @@ impl Ins { ins_ufield!(ps_l, u8, 17..20); ins_ifield!(ps_d, 20..32); + pub fn branch_offset(&self) -> Option { + match self.op { + Opcode::B => Some(self.li()), + Opcode::Bc | Opcode::Bcctr | Opcode::Bclr => Some(self.bd()), + _ => None, + } + } + + pub fn branch_dest(&self) -> Option { + self.branch_offset().and_then(|offset| { + if offset < 0 { + self.addr.checked_sub((-offset) as u32) + } else { + self.addr.checked_add(offset as u32) + } + }) + } + fn write_asm_mtfsfi(&self, out: &mut F) -> std::io::Result<()> where F: AsmFormatter, diff --git a/flow-graph/Cargo.toml b/flow-graph/Cargo.toml new file mode 100644 index 0000000..37426ea --- /dev/null +++ b/flow-graph/Cargo.toml @@ -0,0 +1,15 @@ +[package] +name = "ppc750cl-flow-graph" +version = "0.1.1" +edition = "2018" +authors = ["riidefi ", "Richard Patel "] +license = "GPL-3.0-or-later" +description = "Control flow graph analysis for PowerPC 750CL" +repository = "https://github.com/terorie/ppc750cl" + +[dependencies] +clap = "2.33" +itertools = "0.10" +parse_int = "0.5" +petgraph = "0.6" +ppc750cl = { version = "0.1.1", path = "../disasm" } diff --git a/flow-graph/src/main.rs b/flow-graph/src/main.rs new file mode 100644 index 0000000..542e420 --- /dev/null +++ b/flow-graph/src/main.rs @@ -0,0 +1,210 @@ +use std::collections::{BTreeMap, BTreeSet, HashSet}; +use std::fmt::{Debug, Display, Formatter}; +use std::ops::Range; + +use clap::clap_app; +use petgraph::dot::{Config as DotConfig, Dot}; +use petgraph::graph::{DefaultIx, NodeIndex}; +use petgraph::Graph; + +use itertools::Itertools; +use ppc750cl::{disasm_iter, Ins, Opcode}; + +fn main() { + let matches = clap_app!(myapp => + (version: "1.0") + (about: "Control flow graph analysis for PowerPC 750CL") + (@arg ADDR: --addr +required +takes_value "Address") + (@arg INPUT: +required "Binary input file") + ) + .get_matches(); + + let addr = matches.value_of("ADDR").unwrap(); + let addr: u32 = ::parse_int::parse(addr).expect("Invalid address flag"); + + let file_path = matches.value_of("INPUT").unwrap(); + let bytes = std::fs::read(file_path).expect("Failed to read file"); + + // Create control flow graph. + let ins_list: Vec = disasm_iter(&bytes, addr).collect(); + let basic_slices = BasicSlices::from_code(&ins_list); + let graph = basic_slices.to_control_flow_graph(&ins_list); + + // Output graphviz. + let graphviz = Dot::with_config( + &graph, + &[DotConfig::EdgeNoLabel, DotConfig::GraphContentOnly], + ); + println!( + concat!( + "digraph func {{\n", + "node [shape=record fontname=Arial];\n", + "{:?}\n", + "}}" + ), + graphviz + ); +} + +/// The instruction address divided by four. +type CodeIdx = u32; + +struct BasicSlices { + /// The indexes separating instructions into basic blocks. + /// Used to create a list of consecutive basic blocks. + cuts: BTreeSet, + /// The possible branches from one instruction to another. + /// Used to link together basic blocks into a directed graph. + branches: HashSet<(CodeIdx, CodeIdx)>, +} + +impl BasicSlices { + /// Computes basic slices from instructions. + fn from_code(code: &[Ins]) -> Self { + let mut cuts = BTreeSet::::new(); + let mut branches = HashSet::<(CodeIdx, CodeIdx)>::new(); + for ins in code { + let cur_index = ins.addr / 4; + let is_control_flow_ins = match ins.op { + // Direct branches are control flow instructions if they don't save the link register. + // If they do, we encountered a function call. + Opcode::B | Opcode::Bc => ins.lk() == 0, + // Switch table + Opcode::Bcctr => panic!("jump tables not supported yet"), + _ => false, + }; + if !is_control_flow_ins { + continue; + } + // We encountered some kind of control flow instruction. + if ins.code != Opcode::BLR { + // There's a possibility that branch can be taken. + // Branch destinations are always the first instruction of a block. + // Thus, we also found the end of another block. + let new_index = ins.branch_dest().unwrap() / 4; + cuts.insert(new_index); + branches.insert((cur_index, new_index)); + } + if is_conditional_branch(ins) { + // There's a possibility that branch is not taken. + // End block anyways. + cuts.insert(cur_index + 1); + branches.insert((cur_index, cur_index + 1)); + } + } + Self { cuts, branches } + } + + /// Creates a control-flow graph. + fn to_control_flow_graph<'a>(&self, code: &'a [Ins]) -> Graph, ()> { + if code.is_empty() { + return Graph::new(); + } + // Walk set cuts and create basic blocks. + let mut graph = Graph::new(); + let mut node_by_addr = BTreeMap::>::new(); + let mut block_start: CodeIdx = code[0].addr / 4; + for cut in &self.cuts { + if *cut > block_start { + node_by_addr.insert( + block_start, + graph.add_node(BasicBlock::from_code_slice(block_start..*cut, code)), + ); + } + block_start = *cut; + } + // Last block. + let func_end: CodeIdx = (code.last().unwrap().addr / 4) + 1; + if func_end > block_start { + node_by_addr.insert( + block_start, + graph.add_node(BasicBlock::from_code_slice(block_start..func_end, code)), + ); + } + // Walk set of branches and connect graph. + for branch in &self.branches { + let src_node_idx = match node_by_addr.range(..branch.0 + 1).last() { + None => continue, + Some(idx) => *idx.1, + }; + debug_assert!(graph[src_node_idx].range.contains(&branch.0)); + let dst_node_idx = match node_by_addr.range(..branch.1 + 1).last() { + None => continue, + Some(idx) => *idx.1, + }; + debug_assert!(graph[dst_node_idx].range.contains(&branch.1)); + graph.add_edge(src_node_idx, dst_node_idx, ()); + } + // Walk blocks and re-connect nodes that were split off. + for (src_node_idx, dst_node_idx) in node_by_addr.values().tuple_windows::<(_, _)>() { + // Get pairs of two blocks as a sliding window. + let src_block: &BasicBlock = &graph[*src_node_idx]; + let dst_block: &BasicBlock = &graph[*dst_node_idx]; + assert_eq!(src_block.range.end, dst_block.range.start); + // Get last instruction of left block. + // Unless it's an unconditional branch, we can connect the blocks. + let last_ins = src_block.code.last().unwrap(); + if last_ins.code == Opcode::BLR + || (last_ins.op == Opcode::B && last_ins.bo() == 0b10100) + { + continue; + } + // Execution can continue past the last instruction of a block, + // so re-connect two blocks that were split off. + if !graph.contains_edge(*src_node_idx, *dst_node_idx) { + graph.add_edge(*src_node_idx, *dst_node_idx, ()); + } + } + graph + } +} + +fn is_conditional_branch(ins: &Ins) -> bool { + match ins.op { + Opcode::Bc | Opcode::Bcctr | Opcode::Bclr => (), + _ => return false, + }; + // Check whether bits "branch always". + ins.bo() & 0b10100 != 0b10100 +} + +struct BasicBlock<'a> { + range: Range, + code: &'a [Ins], +} + +impl<'a> BasicBlock<'a> { + fn from_code_slice(range: Range, complete_code: &'a [Ins]) -> BasicBlock { + let start_idx = complete_code.first().unwrap().addr / 4; + assert!(start_idx <= range.start); + let offset = (range.start - start_idx) as usize; + let code = &complete_code[offset..(offset + (range.len() as usize))]; + BasicBlock { range, code } + } +} + +impl<'a> Display for BasicBlock<'a> { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + writeln!( + f, + "{:0>#8x}..{:0>#8x}", + self.range.start * 4, + self.range.end * 4 + ) + } +} + +impl<'a> Debug for BasicBlock<'a> { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + writeln!( + f, + "// {:0>#8x}..{:0>#8x}", + self.range.start * 4, + self.range.end * 4 + )?; + for ins in self.code { + writeln!(f, "{}", ins.to_string())?; + } + Ok(()) + } +}