switch from macro to ahead-of-time generated code

This commit is contained in:
Richard Patel 2022-04-06 15:13:55 +02:00
parent 171666e6fc
commit 5431836376
14 changed files with 108 additions and 305 deletions

3
Cargo.lock generated
View File

@ -325,7 +325,6 @@ name = "ppc750cl"
version = "0.1.1"
dependencies = [
"num-traits",
"ppc750cl-macros",
"serde",
]
@ -350,7 +349,7 @@ dependencies = [
]
[[package]]
name = "ppc750cl-macros"
name = "ppc750cl-genisa"
version = "0.1.1"
dependencies = [
"itertools",

View File

@ -3,8 +3,8 @@ members = [
"disasm",
"disasm-py",
"dol",
"macros",
"fuzz",
"genisa",
"flow-graph",
"rand",
]

View File

@ -1,7 +1,7 @@
[package]
name = "ppc750cl-py"
version = "0.1.1"
edition = "2018"
edition = "2021"
authors = ["Richard Patel <me@terorie.dev>"]
license = "GPL-3.0-or-later"
description = "Python bindings for PowerPC 750CL Disassembler"

View File

@ -1,7 +1,7 @@
[package]
name = "ppc750cl"
version = "0.1.1"
edition = "2018"
edition = "2021"
authors = ["Richard Patel <me@terorie.dev>"]
license = "GPL-3.0-or-later"
description = "Disassembler for PowerPC 750CL"
@ -10,5 +10,4 @@ repository = "https://github.com/terorie/ppc750cl"
[dependencies]
num-traits = "0.2"
ppc750cl-macros = { path = "../macros", version = "0.1.1" }
serde = "1.0"

View File

@ -3,19 +3,20 @@ use std::ops::Range;
use num_traits::{AsPrimitive, PrimInt};
use ppc750cl_macros::{fields, ins_impl, opcodes};
pub use crate::iter::{disasm_iter, DisasmIterator};
pub mod formatter;
mod iter;
mod generated;
use generated::*;
pub mod prelude {
pub use crate::formatter::FormattedIns;
pub use crate::Field;
pub use crate::Field::*;
pub use crate::Ins;
pub use crate::Opcode::*;
pub use crate::Modifiers;
pub use crate::SimplifiedIns;
pub use crate::{
Bit, BranchDest, CRBit, CRField, Offset, OpaqueU, Simm, Uimm, FPR, GPR, GQR, SPR, SR,
};
@ -165,9 +166,6 @@ impl Display for Argument {
}
}
// Generate the Field enum and impls.
fields!();
impl Field {
pub fn argument(&self) -> Option<Argument> {
match self {
@ -237,10 +235,6 @@ impl std::fmt::Display for Modifiers {
}
}
// Generate the Opcode enum and impls.
// TODO This could be made more readable with a derive over an empty enum.
opcodes!();
impl Opcode {
/// Detects the opcode of a machine code instruction.
pub fn detect(code: u32) -> Self {
@ -320,8 +314,6 @@ impl Ins {
}
}
ins_impl!();
/// A simplified PowerPC 750CL instruction.
pub struct SimplifiedIns {
pub ins: Ins,

View File

@ -1,7 +1,7 @@
[package]
name = "dol"
version = "0.1.0"
edition = "2018"
edition = "2021"
authors = ["Richard Patel <me@terorie.dev>"]
license = "GPL-3.0-or-later"
description = "Deserializer for the DOL executable format"

View File

@ -1,7 +1,7 @@
[package]
name = "ppc750cl-flow-graph"
version = "0.1.1"
edition = "2018"
edition = "2021"
authors = ["riidefi <riidefi@rii.dev>", "Richard Patel <me@terorie.dev>"]
license = "GPL-3.0-or-later"
description = "Control flow graph analysis for PowerPC 750CL"

View File

@ -1,7 +1,7 @@
[package]
name = "ppc750cl-fuzz"
version = "0.1.1"
edition = "2018"
edition = "2021"
authors = ["Richard Patel <me@terorie.dev>"]
license = "GPL-3.0-or-later"
description = "Complete fuzzer for ppc750cl"

View File

@ -1,19 +1,17 @@
[package]
name = "ppc750cl-macros"
name = "ppc750cl-genisa"
version = "0.1.1"
edition = "2018"
edition = "2021"
authors = ["Richard Patel <me@terorie.dev>"]
license = "GPL-3.0-or-later"
description = "Auxiliary procedural macros for the ppc750cl disassembler"
description = "Rust code generator for ppc750cl"
repository = "https://github.com/terorie/ppc750cl"
[lib]
proc-macro = true
[dependencies]
itertools = "0.10.1"
itertools = "0.10"
proc-macro2 = "1.0"
quote = "1.0"
syn = { version = "1.0" }
serde = { version = "1.0", features = ["derive"] }
serde_yaml = "0.8"
syn = { version = "1.0", features = ["full", "parsing"] }

View File

@ -1,6 +1,8 @@
use std::collections::HashMap;
use std::iter::FromIterator;
use std::fs::File;
use std::io::Write;
use std::ops::Range;
use std::process::{Command, Stdio};
use itertools::Itertools;
use proc_macro2::{Ident, Literal, Span, TokenStream, TokenTree};
@ -14,13 +16,58 @@ macro_rules! token_stream {
};
}
fn main() {
if let Err(err) = _main() {
eprintln!("{}", err);
std::process::exit(1);
}
}
fn _main() -> Result<()> {
let isa = load_isa()?;
let mut unformatted_code = Vec::<u8>::new();
writeln!(&mut unformatted_code, "{}", quote! {
use crate::prelude::*;
})?;
writeln!(&mut unformatted_code, "{}", isa.gen_opcode_enum()?)?;
writeln!(&mut unformatted_code, "{}", isa.gen_field_enum()?)?;
writeln!(&mut unformatted_code, "{}", isa.gen_ins_impl()?)?;
let formatted_code = rustfmt(unformatted_code);
File::create("./disasm/src/generated.rs")?
.write_all(&formatted_code)?;
Ok(())
}
fn rustfmt(code: Vec<u8>) -> Vec<u8> {
let mut rustfmt = Command::new("rustfmt")
.stdin(Stdio::piped())
.stdout(Stdio::piped())
.spawn()
.expect("failed to spawn rustfmt");
let mut stdin = rustfmt.stdin.take().unwrap();
std::thread::spawn(move || {
let _ = stdin.write_all(&code);
});
let rustfmt_res = rustfmt.wait_with_output().expect("failed to run rustfmt");
if !rustfmt_res.status.success() {
panic!("rustfmt failed");
}
rustfmt_res.stdout
}
#[derive(Default)]
pub(crate) struct BitRange(Range<u8>);
impl<'de> Deserialize<'de> for BitRange {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
where
D: Deserializer<'de>,
{
let range_str: String = Deserialize::deserialize(deserializer)?;
if let Some((start_str, stop_str)) = range_str.split_once("..") {
@ -115,7 +162,7 @@ pub(crate) struct Opcode {
}
impl Opcode {
fn variant_identifier(&self) -> syn::Result<TokenTree> {
fn variant_identifier(&self) -> Result<TokenTree> {
to_rust_variant(&self.name)
}
}
@ -153,19 +200,25 @@ pub(crate) struct Isa {
mnemonics: Vec<Mnemonic>,
}
fn load_isa() -> Result<Isa> {
let yaml_file = File::open("isa.yaml")?;
let isa: Isa = serde_yaml::from_reader(yaml_file)?;
Ok(isa)
}
impl Isa {
pub(crate) fn gen_opcode_enum(&self) -> syn::Result<TokenStream> {
fn gen_opcode_enum(&self) -> Result<TokenStream> {
// Create enum variants.
let enum_variants = self
.opcodes
.iter()
.map(|opcode| -> syn::Result<TokenStream> {
.map(|opcode| -> Result<TokenStream> {
let ident = opcode.variant_identifier()?;
Ok(quote! {
#ident,
})
})
.try_collect::<TokenStream, Vec<TokenStream>, syn::Error>()?;
.try_collect::<TokenStream, Vec<TokenStream>, Error>()?;
let enum_variants = token_stream!(enum_variants);
// Create functions.
@ -187,7 +240,7 @@ impl Isa {
Ok(opcode_enum)
}
fn gen_mnemonic_fn(&self) -> syn::Result<TokenStream> {
fn gen_mnemonic_fn(&self) -> Result<TokenStream> {
// Create match arms.
let match_arms = self
.opcodes
@ -200,11 +253,11 @@ impl Isa {
Opcode::#variant => #literal,
})
})
.try_collect::<TokenStream, Vec<TokenStream>, syn::Error>()?;
.try_collect::<TokenStream, Vec<TokenStream>, Error>()?;
let match_arms = token_stream!(match_arms);
// Create final function.
let mnemonic_fn = quote! {
fn _mnemonic(self) -> &'static str {
pub(crate) fn _mnemonic(self) -> &'static str {
match self {
Opcode::Illegal => "<illegal>",
#match_arms
@ -214,7 +267,7 @@ impl Isa {
Ok(mnemonic_fn)
}
pub(crate) fn gen_opcode_detect(&self) -> syn::Result<TokenStream> {
pub(crate) fn gen_opcode_detect(&self) -> Result<TokenStream> {
// Generate if chain.
let if_chain = self
.opcodes
@ -231,11 +284,11 @@ impl Isa {
}
})
})
.try_collect::<TokenStream, Vec<TokenStream>, syn::Error>()?;
.try_collect::<TokenStream, Vec<TokenStream>, Error>()?;
let if_chain = token_stream!(if_chain);
// Generate function.
let func = quote! {
fn _detect(code: u32) -> Self {
pub(crate) fn _detect(code: u32) -> Self {
#if_chain
Opcode::Illegal
}
@ -243,7 +296,7 @@ impl Isa {
Ok(func)
}
pub(crate) fn gen_field_enum(&self) -> syn::Result<TokenStream> {
pub(crate) fn gen_field_enum(&self) -> Result<TokenStream> {
// Create enum variants.
let mut enum_variants = Vec::new();
for field in &self.fields {
@ -255,6 +308,7 @@ impl Isa {
// Create final enum.
let field_enum = quote! {
#[allow(non_camel_case_types)]
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub enum Field {
#enum_variants
@ -263,7 +317,8 @@ impl Isa {
Ok(field_enum)
}
pub(crate) fn gen_ins_impl(&self) -> syn::Result<TokenStream> {
pub(crate) fn gen_ins_impl(&self) -> Result<TokenStream> {
// Map fields by name.
let mut field_by_name = HashMap::<String, &Field>::new();
for field in &self.fields {
@ -280,7 +335,7 @@ impl Isa {
let mut fields = Vec::new();
for arg in &opcode.args {
let field: &Field = field_by_name.get(arg).ok_or_else(|| {
syn::Error::new(Span::call_site(), format!("undefined field {}", arg))
Error::from(format!("undefined field {}", arg))
})?;
let variant = field.construct_variant_self();
fields.extend(quote! { #variant, })
@ -301,10 +356,7 @@ impl Isa {
"AA" => quote! { m.aa = self.bit(30); },
"LK" => quote! { m.lk = self.bit(31); },
_ => {
return Err(syn::Error::new(
Span::call_site(),
format!("unsupported modifier {}", modifier),
))
return Err(format!("unsupported modifier {}", modifier).into());
}
})
}
@ -315,10 +367,7 @@ impl Isa {
"AA" => quote! { m.aa = true; },
"LK" => quote! { m.lk = true; },
_ => {
return Err(syn::Error::new(
Span::call_site(),
format!("unsupported modifier {}", modifier),
))
return Err(format!("unsupported modifier {}", modifier).into());
}
})
}
@ -393,39 +442,38 @@ impl Isa {
// Generate final fields function.
let ins_impl = quote! {
impl Ins {
fn _fields(&self) -> Vec<Field> {
pub(crate) fn _fields(&self) -> Vec<Field> {
match self.op {
Opcode::Illegal => vec![],
#field_match_arms
_ => todo!()
}
}
fn _defs(&self) -> Vec<Field> {
#[allow(unused_mut)]
pub(crate) fn _defs(&self) -> Vec<Field> {
match self.op {
Opcode::Illegal => vec![],
#def_match_arms
_ => todo!()
}
}
fn _uses(&self) -> Vec<Field> {
#[allow(unused_mut)]
pub(crate) fn _uses(&self) -> Vec<Field> {
match self.op {
Opcode::Illegal => vec![],
#use_match_arms
_ => todo!()
}
}
fn _modifiers(&self) -> Modifiers {
#[allow(unused_mut)]
pub(crate) fn _modifiers(&self) -> Modifiers {
match self.op {
Opcode::Illegal => Modifiers::default(),
#modifier_match_arms
_ => todo!()
}
}
fn _simplified(self) -> SimplifiedIns {
pub(crate) fn _simplified(self) -> SimplifiedIns {
SimplifiedIns {
mnemonic: self.op.mnemonic(),
modifiers: self._modifiers(),
@ -444,15 +492,15 @@ fn to_rust_ident(key: &str) -> TokenTree {
TokenTree::Ident(Ident::new(&key.replace(".", "_"), Span::call_site()))
}
/// Converts the given key into a struct variant key.
fn to_rust_variant(key: &str) -> syn::Result<TokenTree> {
/// Converts the given key into an enum variant key.
fn to_rust_variant(key: &str) -> Result<TokenTree> {
Ok(TokenTree::Ident(Ident::new(
&to_rust_variant_str(key).map_err(|e| syn::Error::new(Span::call_site(), e))?,
&to_rust_variant_str(key)?,
Span::call_site(),
)))
}
fn to_rust_variant_str(key: &str) -> Result<String, String> {
fn to_rust_variant_str(key: &str) -> Result<String> {
let mut s = String::new();
let mut chars = key.chars();
loop {
@ -464,7 +512,7 @@ fn to_rust_variant_str(key: &str) -> Result<String, String> {
s.push(match c {
'a'..='z' => c.to_ascii_uppercase(),
'A'..='Z' => c,
_ => return Err(format!("invalid identifier: {}", key)),
_ => return Err(format!("invalid identifier: {}", key).into()),
});
loop {
let c = match chars.next() {
@ -478,8 +526,11 @@ fn to_rust_variant_str(key: &str) -> Result<String, String> {
s.push('_');
break;
}
_ => return Err(format!("invalid character in opcode name: {}", key)),
_ => return Err(format!("invalid character in opcode name: {}", key).into()),
}
}
}
}
type Error = Box<dyn std::error::Error>;
type Result<T> = std::result::Result<T, Error>;

View File

@ -1,66 +0,0 @@
#![feature(proc_macro_span, proc_macro_def_site)]
mod isa;
//mod writer;
use std::fs::File;
use proc_macro::Span;
use crate::isa::Isa;
#[proc_macro]
pub fn opcodes(_: proc_macro::TokenStream) -> proc_macro::TokenStream {
let isa = match load_isa() {
Ok(v) => v,
Err(err) => return err,
};
match isa.gen_opcode_enum() {
Ok(v) => v.into(),
Err(err) => proc_macro::TokenStream::from(err.to_compile_error()),
}
}
#[proc_macro]
pub fn fields(_: proc_macro::TokenStream) -> proc_macro::TokenStream {
let isa = match load_isa() {
Ok(v) => v,
Err(err) => return err,
};
match isa.gen_field_enum() {
Ok(v) => v.into(),
Err(err) => proc_macro::TokenStream::from(err.to_compile_error()),
}
}
#[proc_macro]
pub fn ins_impl(_: proc_macro::TokenStream) -> proc_macro::TokenStream {
let isa = match load_isa() {
Ok(v) => v,
Err(err) => return err,
};
match isa.gen_ins_impl() {
Ok(v) => v.into(),
Err(err) => proc_macro::TokenStream::from(err.to_compile_error()),
}
}
fn load_isa() -> Result<Isa, proc_macro::TokenStream> {
_load_isa().map_err(|err| {
proc_macro::TokenStream::from(
syn::Error::new(Span::def_site().into(), err).to_compile_error(),
)
})
}
fn _load_isa() -> Result<Isa, Box<dyn std::error::Error>> {
// Figure out YAML path.
let def_site = Span::def_site();
let rust_path = def_site.source_file().path();
let yaml_path = rust_path.parent().unwrap().join("isa.yaml");
// Open and deserialize YAML file.
let yaml_file = File::open(yaml_path).map_err(|e| syn::Error::new(def_site.into(), e))?;
let isa: Isa =
serde_yaml::from_reader(yaml_file).map_err(|e| syn::Error::new(def_site.into(), e))?;
Ok(isa)
}

View File

@ -1,170 +0,0 @@
use std::iter::FromIterator;
use std::string::ToString;
use proc_macro2::{Delimiter, Group, TokenStream};
use quote::quote;
use quote::ToTokens;
use syn::parse::{Parse, ParseStream};
use syn::punctuated::Punctuated;
use syn::spanned::Spanned;
use syn::{Expr, ExprLit, ExprPath, Ident};
struct Arguments {
formatter: Expr,
ins: Expr,
args: Punctuated<Argument, syn::token::Semi>,
}
impl Parse for Arguments {
fn parse(input: ParseStream) -> syn::Result<Self> {
let formatter = input.parse()?;
input.parse::<syn::token::Comma>()?;
let ins = input.parse()?;
input.parse::<syn::token::FatArrow>()?;
let content;
syn::braced!(content in input);
let args = Punctuated::parse_terminated(&content)?;
Ok(Self {
formatter,
ins,
args,
})
}
}
/// A single part of an instruction.
///
/// Examples:
/// ```ignore
/// (op.mnemonic, rc, oe) -> mnemonic;
/// d -> fpr;
/// ```
struct Argument {
sources: Vec<Expr>,
target: Ident,
}
impl Parse for Argument {
fn parse(input: ParseStream) -> syn::Result<Self> {
// Parse source part.
let lookahead = input.lookahead1();
let sources;
if lookahead.peek(syn::token::Paren) {
// Parse multiple if we found a parenthesis.
let content;
syn::parenthesized!(content in input);
sources = content
.parse_terminated::<Expr, syn::token::Comma>(Expr::parse)?
.into_iter()
.collect();
} else if lookahead.peek(syn::LitStr) || lookahead.peek(syn::LitInt) {
let expr = input.parse::<ExprLit>()?.into();
sources = vec![expr];
} else {
let expr = input.parse::<ExprPath>()?.into();
sources = vec![expr];
}
input.parse::<syn::token::Colon>()?;
let target = input.parse()?;
Ok(Self { sources, target })
}
}
impl Arguments {
fn format_mnemonic(&self) -> Vec<TokenStream> {
let arg = &self.args[0];
assert!(!arg.sources.is_empty());
// Print the mnemonic.
let mut calls = vec![self.format_call(&arg.target, self.ins_call(&arg.sources[0]))];
// Print any mnemonic suffixes.
for src in arg.sources.iter().skip(1) {
calls.push(self.format_call(
&Ident::new(&src.into_token_stream().to_string(), src.span()),
self.ins_call(src),
));
}
calls
}
fn format_call(&self, method_arg: &Ident, args: TokenStream) -> TokenStream {
let arg_str = method_arg.to_string();
let method_name = format!("write_{}", arg_str);
let method_name = Ident::new(&method_name, method_arg.span());
let formatter = &self.formatter;
if arg_str == "branch_target" {
quote!(#formatter.write_branch_target(#args, self.addr)?)
} else {
quote!(#formatter.#method_name(#args)?)
}
}
fn ins_call(&self, call: &Expr) -> TokenStream {
match call {
Expr::Lit(_) => call.to_token_stream(),
_ => {
let ins = &self.ins;
quote!(#ins.#call())
}
}
}
}
pub(crate) fn write_asm(input: TokenStream) -> syn::Result<TokenStream> {
let arguments: Arguments = syn::parse2(input)?;
assert!(!arguments.args.is_empty());
// Create a list of calls to execute.
let mut calls = Vec::<TokenStream>::new();
calls.extend(arguments.format_mnemonic());
let mut offset_open = false;
for (i, arg) in arguments.args.iter().enumerate().skip(1) {
// Separate operands from one another unless the last one was an offset.
if !offset_open {
if i == 1 {
calls.push(
arguments
.format_call(&Ident::new("opcode_separator", arg.target.span()), quote!()),
);
} else {
calls.push(arguments.format_call(
&Ident::new("operand_separator", arg.target.span()),
quote!(),
));
}
}
// Arguments to out.write_x(...);
let format_args = arg.sources.iter().map(|src| arguments.ins_call(src));
let format_args_punct: Punctuated<TokenStream, syn::token::Comma> =
Punctuated::from_iter(format_args);
// Create call.
if arg.target.to_string().starts_with("offset") {
// Offsets are a special case since we need to call close afterwards.
if offset_open {
return Err(syn::Error::new(
arg.target.span(),
"two consecutive offset arguments",
));
}
calls.push(arguments.format_call(
&Ident::new(&(arg.target.to_string() + "_open"), arg.target.span()),
format_args_punct.to_token_stream(),
));
offset_open = true;
} else {
calls.push(arguments.format_call(&arg.target, format_args_punct.to_token_stream()));
if offset_open {
calls.push(
arguments.format_call(&Ident::new("offset_close", arg.target.span()), quote!()),
);
offset_open = false;
}
}
}
// Wrap calls in a block returning Ok(()).
calls.push(quote!(std::io::Result::Ok(())));
let statements = Punctuated::<TokenStream, syn::token::Semi>::from_iter(calls);
let tokens = Group::new(Delimiter::Brace, statements.to_token_stream());
Ok(tokens.to_token_stream())
}

View File

@ -1,7 +1,7 @@
[package]
name = "ppc750cl-rand"
version = "0.1.1"
edition = "2018"
edition = "2021"
authors = ["Richard Patel <me@terorie.dev>"]
license = "GPL-3.0-or-later"
description = "Generate random PowerPC 750CL instructions"