From 54318363761aab62636c52e571cf2e6c4a53be67 Mon Sep 17 00:00:00 2001 From: Richard Patel Date: Wed, 6 Apr 2022 15:13:55 +0200 Subject: [PATCH] switch from macro to ahead-of-time generated code --- Cargo.lock | 3 +- Cargo.toml | 2 +- disasm-py/Cargo.toml | 2 +- disasm/Cargo.toml | 3 +- disasm/src/lib.rs | 16 +-- dol/Cargo.toml | 2 +- flow-graph/Cargo.toml | 2 +- fuzz/Cargo.toml | 2 +- {macros => genisa}/Cargo.toml | 12 +- macros/src/isa.rs => genisa/src/main.rs | 131 ++++++++++++------ macros/src/isa.yaml => isa.yaml | 0 macros/src/lib.rs | 66 --------- macros/src/writer.rs | 170 ------------------------ rand/Cargo.toml | 2 +- 14 files changed, 108 insertions(+), 305 deletions(-) rename {macros => genisa}/Cargo.toml (55%) rename macros/src/isa.rs => genisa/src/main.rs (80%) rename macros/src/isa.yaml => isa.yaml (100%) delete mode 100644 macros/src/lib.rs delete mode 100644 macros/src/writer.rs diff --git a/Cargo.lock b/Cargo.lock index 8936313..26c4152 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -325,7 +325,6 @@ name = "ppc750cl" version = "0.1.1" dependencies = [ "num-traits", - "ppc750cl-macros", "serde", ] @@ -350,7 +349,7 @@ dependencies = [ ] [[package]] -name = "ppc750cl-macros" +name = "ppc750cl-genisa" version = "0.1.1" dependencies = [ "itertools", diff --git a/Cargo.toml b/Cargo.toml index f70da3f..71aa117 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,8 +3,8 @@ members = [ "disasm", "disasm-py", "dol", - "macros", "fuzz", + "genisa", "flow-graph", "rand", ] diff --git a/disasm-py/Cargo.toml b/disasm-py/Cargo.toml index 08b4a9d..c05ce1a 100644 --- a/disasm-py/Cargo.toml +++ b/disasm-py/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "ppc750cl-py" version = "0.1.1" -edition = "2018" +edition = "2021" authors = ["Richard Patel "] license = "GPL-3.0-or-later" description = "Python bindings for PowerPC 750CL Disassembler" diff --git a/disasm/Cargo.toml b/disasm/Cargo.toml index 7ad009f..ce29169 100644 --- a/disasm/Cargo.toml +++ b/disasm/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "ppc750cl" version = "0.1.1" -edition = "2018" +edition = "2021" authors = ["Richard Patel "] license = "GPL-3.0-or-later" description = "Disassembler for PowerPC 750CL" @@ -10,5 +10,4 @@ repository = "https://github.com/terorie/ppc750cl" [dependencies] num-traits = "0.2" -ppc750cl-macros = { path = "../macros", version = "0.1.1" } serde = "1.0" diff --git a/disasm/src/lib.rs b/disasm/src/lib.rs index 8fd07ef..5e38e6c 100644 --- a/disasm/src/lib.rs +++ b/disasm/src/lib.rs @@ -3,19 +3,20 @@ use std::ops::Range; use num_traits::{AsPrimitive, PrimInt}; -use ppc750cl_macros::{fields, ins_impl, opcodes}; - pub use crate::iter::{disasm_iter, DisasmIterator}; pub mod formatter; mod iter; +mod generated; +use generated::*; pub mod prelude { pub use crate::formatter::FormattedIns; - pub use crate::Field; pub use crate::Field::*; pub use crate::Ins; pub use crate::Opcode::*; + pub use crate::Modifiers; + pub use crate::SimplifiedIns; pub use crate::{ Bit, BranchDest, CRBit, CRField, Offset, OpaqueU, Simm, Uimm, FPR, GPR, GQR, SPR, SR, }; @@ -165,9 +166,6 @@ impl Display for Argument { } } -// Generate the Field enum and impls. -fields!(); - impl Field { pub fn argument(&self) -> Option { match self { @@ -237,10 +235,6 @@ impl std::fmt::Display for Modifiers { } } -// Generate the Opcode enum and impls. -// TODO This could be made more readable with a derive over an empty enum. -opcodes!(); - impl Opcode { /// Detects the opcode of a machine code instruction. pub fn detect(code: u32) -> Self { @@ -320,8 +314,6 @@ impl Ins { } } -ins_impl!(); - /// A simplified PowerPC 750CL instruction. pub struct SimplifiedIns { pub ins: Ins, diff --git a/dol/Cargo.toml b/dol/Cargo.toml index 195d6a4..5b3a995 100644 --- a/dol/Cargo.toml +++ b/dol/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "dol" version = "0.1.0" -edition = "2018" +edition = "2021" authors = ["Richard Patel "] license = "GPL-3.0-or-later" description = "Deserializer for the DOL executable format" diff --git a/flow-graph/Cargo.toml b/flow-graph/Cargo.toml index 0afa423..18d12bd 100644 --- a/flow-graph/Cargo.toml +++ b/flow-graph/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "ppc750cl-flow-graph" version = "0.1.1" -edition = "2018" +edition = "2021" authors = ["riidefi ", "Richard Patel "] license = "GPL-3.0-or-later" description = "Control flow graph analysis for PowerPC 750CL" diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index fdcb8d7..c4d86af 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "ppc750cl-fuzz" version = "0.1.1" -edition = "2018" +edition = "2021" authors = ["Richard Patel "] license = "GPL-3.0-or-later" description = "Complete fuzzer for ppc750cl" diff --git a/macros/Cargo.toml b/genisa/Cargo.toml similarity index 55% rename from macros/Cargo.toml rename to genisa/Cargo.toml index d48bdce..ddbb750 100644 --- a/macros/Cargo.toml +++ b/genisa/Cargo.toml @@ -1,19 +1,17 @@ [package] -name = "ppc750cl-macros" +name = "ppc750cl-genisa" version = "0.1.1" -edition = "2018" +edition = "2021" authors = ["Richard Patel "] license = "GPL-3.0-or-later" -description = "Auxiliary procedural macros for the ppc750cl disassembler" +description = "Rust code generator for ppc750cl" repository = "https://github.com/terorie/ppc750cl" -[lib] -proc-macro = true [dependencies] -itertools = "0.10.1" +itertools = "0.10" proc-macro2 = "1.0" quote = "1.0" +syn = { version = "1.0" } serde = { version = "1.0", features = ["derive"] } serde_yaml = "0.8" -syn = { version = "1.0", features = ["full", "parsing"] } diff --git a/macros/src/isa.rs b/genisa/src/main.rs similarity index 80% rename from macros/src/isa.rs rename to genisa/src/main.rs index 213b307..f657e54 100644 --- a/macros/src/isa.rs +++ b/genisa/src/main.rs @@ -1,6 +1,8 @@ use std::collections::HashMap; -use std::iter::FromIterator; +use std::fs::File; +use std::io::Write; use std::ops::Range; +use std::process::{Command, Stdio}; use itertools::Itertools; use proc_macro2::{Ident, Literal, Span, TokenStream, TokenTree}; @@ -14,13 +16,58 @@ macro_rules! token_stream { }; } +fn main() { + if let Err(err) = _main() { + eprintln!("{}", err); + std::process::exit(1); + } +} + +fn _main() -> Result<()> { + let isa = load_isa()?; + + let mut unformatted_code = Vec::::new(); + writeln!(&mut unformatted_code, "{}", quote! { + use crate::prelude::*; + })?; + writeln!(&mut unformatted_code, "{}", isa.gen_opcode_enum()?)?; + writeln!(&mut unformatted_code, "{}", isa.gen_field_enum()?)?; + writeln!(&mut unformatted_code, "{}", isa.gen_ins_impl()?)?; + + let formatted_code = rustfmt(unformatted_code); + File::create("./disasm/src/generated.rs")? + .write_all(&formatted_code)?; + + Ok(()) +} + +fn rustfmt(code: Vec) -> Vec { + let mut rustfmt = Command::new("rustfmt") + .stdin(Stdio::piped()) + .stdout(Stdio::piped()) + .spawn() + .expect("failed to spawn rustfmt"); + + let mut stdin = rustfmt.stdin.take().unwrap(); + std::thread::spawn(move || { + let _ = stdin.write_all(&code); + }); + + let rustfmt_res = rustfmt.wait_with_output().expect("failed to run rustfmt"); + if !rustfmt_res.status.success() { + panic!("rustfmt failed"); + } + + rustfmt_res.stdout +} + #[derive(Default)] pub(crate) struct BitRange(Range); impl<'de> Deserialize<'de> for BitRange { - fn deserialize(deserializer: D) -> Result - where - D: Deserializer<'de>, + fn deserialize(deserializer: D) -> std::result::Result + where + D: Deserializer<'de>, { let range_str: String = Deserialize::deserialize(deserializer)?; if let Some((start_str, stop_str)) = range_str.split_once("..") { @@ -115,7 +162,7 @@ pub(crate) struct Opcode { } impl Opcode { - fn variant_identifier(&self) -> syn::Result { + fn variant_identifier(&self) -> Result { to_rust_variant(&self.name) } } @@ -153,19 +200,25 @@ pub(crate) struct Isa { mnemonics: Vec, } +fn load_isa() -> Result { + let yaml_file = File::open("isa.yaml")?; + let isa: Isa = serde_yaml::from_reader(yaml_file)?; + Ok(isa) +} + impl Isa { - pub(crate) fn gen_opcode_enum(&self) -> syn::Result { + fn gen_opcode_enum(&self) -> Result { // Create enum variants. let enum_variants = self .opcodes .iter() - .map(|opcode| -> syn::Result { + .map(|opcode| -> Result { let ident = opcode.variant_identifier()?; Ok(quote! { #ident, }) }) - .try_collect::, syn::Error>()?; + .try_collect::, Error>()?; let enum_variants = token_stream!(enum_variants); // Create functions. @@ -187,7 +240,7 @@ impl Isa { Ok(opcode_enum) } - fn gen_mnemonic_fn(&self) -> syn::Result { + fn gen_mnemonic_fn(&self) -> Result { // Create match arms. let match_arms = self .opcodes @@ -200,11 +253,11 @@ impl Isa { Opcode::#variant => #literal, }) }) - .try_collect::, syn::Error>()?; + .try_collect::, Error>()?; let match_arms = token_stream!(match_arms); // Create final function. let mnemonic_fn = quote! { - fn _mnemonic(self) -> &'static str { + pub(crate) fn _mnemonic(self) -> &'static str { match self { Opcode::Illegal => "", #match_arms @@ -214,7 +267,7 @@ impl Isa { Ok(mnemonic_fn) } - pub(crate) fn gen_opcode_detect(&self) -> syn::Result { + pub(crate) fn gen_opcode_detect(&self) -> Result { // Generate if chain. let if_chain = self .opcodes @@ -231,11 +284,11 @@ impl Isa { } }) }) - .try_collect::, syn::Error>()?; + .try_collect::, Error>()?; let if_chain = token_stream!(if_chain); // Generate function. let func = quote! { - fn _detect(code: u32) -> Self { + pub(crate) fn _detect(code: u32) -> Self { #if_chain Opcode::Illegal } @@ -243,7 +296,7 @@ impl Isa { Ok(func) } - pub(crate) fn gen_field_enum(&self) -> syn::Result { + pub(crate) fn gen_field_enum(&self) -> Result { // Create enum variants. let mut enum_variants = Vec::new(); for field in &self.fields { @@ -255,6 +308,7 @@ impl Isa { // Create final enum. let field_enum = quote! { + #[allow(non_camel_case_types)] #[derive(Debug, Copy, Clone, Eq, PartialEq)] pub enum Field { #enum_variants @@ -263,7 +317,8 @@ impl Isa { Ok(field_enum) } - pub(crate) fn gen_ins_impl(&self) -> syn::Result { + + pub(crate) fn gen_ins_impl(&self) -> Result { // Map fields by name. let mut field_by_name = HashMap::::new(); for field in &self.fields { @@ -280,7 +335,7 @@ impl Isa { let mut fields = Vec::new(); for arg in &opcode.args { let field: &Field = field_by_name.get(arg).ok_or_else(|| { - syn::Error::new(Span::call_site(), format!("undefined field {}", arg)) + Error::from(format!("undefined field {}", arg)) })?; let variant = field.construct_variant_self(); fields.extend(quote! { #variant, }) @@ -301,10 +356,7 @@ impl Isa { "AA" => quote! { m.aa = self.bit(30); }, "LK" => quote! { m.lk = self.bit(31); }, _ => { - return Err(syn::Error::new( - Span::call_site(), - format!("unsupported modifier {}", modifier), - )) + return Err(format!("unsupported modifier {}", modifier).into()); } }) } @@ -315,10 +367,7 @@ impl Isa { "AA" => quote! { m.aa = true; }, "LK" => quote! { m.lk = true; }, _ => { - return Err(syn::Error::new( - Span::call_site(), - format!("unsupported modifier {}", modifier), - )) + return Err(format!("unsupported modifier {}", modifier).into()); } }) } @@ -393,39 +442,38 @@ impl Isa { // Generate final fields function. let ins_impl = quote! { impl Ins { - fn _fields(&self) -> Vec { + pub(crate) fn _fields(&self) -> Vec { match self.op { Opcode::Illegal => vec![], #field_match_arms - _ => todo!() } } - fn _defs(&self) -> Vec { + #[allow(unused_mut)] + pub(crate) fn _defs(&self) -> Vec { match self.op { Opcode::Illegal => vec![], #def_match_arms - _ => todo!() } } - fn _uses(&self) -> Vec { + #[allow(unused_mut)] + pub(crate) fn _uses(&self) -> Vec { match self.op { Opcode::Illegal => vec![], #use_match_arms - _ => todo!() } } - fn _modifiers(&self) -> Modifiers { + #[allow(unused_mut)] + pub(crate) fn _modifiers(&self) -> Modifiers { match self.op { Opcode::Illegal => Modifiers::default(), #modifier_match_arms - _ => todo!() } } - fn _simplified(self) -> SimplifiedIns { + pub(crate) fn _simplified(self) -> SimplifiedIns { SimplifiedIns { mnemonic: self.op.mnemonic(), modifiers: self._modifiers(), @@ -444,15 +492,15 @@ fn to_rust_ident(key: &str) -> TokenTree { TokenTree::Ident(Ident::new(&key.replace(".", "_"), Span::call_site())) } -/// Converts the given key into a struct variant key. -fn to_rust_variant(key: &str) -> syn::Result { +/// Converts the given key into an enum variant key. +fn to_rust_variant(key: &str) -> Result { Ok(TokenTree::Ident(Ident::new( - &to_rust_variant_str(key).map_err(|e| syn::Error::new(Span::call_site(), e))?, + &to_rust_variant_str(key)?, Span::call_site(), ))) } -fn to_rust_variant_str(key: &str) -> Result { +fn to_rust_variant_str(key: &str) -> Result { let mut s = String::new(); let mut chars = key.chars(); loop { @@ -464,7 +512,7 @@ fn to_rust_variant_str(key: &str) -> Result { s.push(match c { 'a'..='z' => c.to_ascii_uppercase(), 'A'..='Z' => c, - _ => return Err(format!("invalid identifier: {}", key)), + _ => return Err(format!("invalid identifier: {}", key).into()), }); loop { let c = match chars.next() { @@ -478,8 +526,11 @@ fn to_rust_variant_str(key: &str) -> Result { s.push('_'); break; } - _ => return Err(format!("invalid character in opcode name: {}", key)), + _ => return Err(format!("invalid character in opcode name: {}", key).into()), } } } } + +type Error = Box; +type Result = std::result::Result; diff --git a/macros/src/isa.yaml b/isa.yaml similarity index 100% rename from macros/src/isa.yaml rename to isa.yaml diff --git a/macros/src/lib.rs b/macros/src/lib.rs deleted file mode 100644 index de4997e..0000000 --- a/macros/src/lib.rs +++ /dev/null @@ -1,66 +0,0 @@ -#![feature(proc_macro_span, proc_macro_def_site)] - -mod isa; -//mod writer; - -use std::fs::File; - -use proc_macro::Span; - -use crate::isa::Isa; - -#[proc_macro] -pub fn opcodes(_: proc_macro::TokenStream) -> proc_macro::TokenStream { - let isa = match load_isa() { - Ok(v) => v, - Err(err) => return err, - }; - match isa.gen_opcode_enum() { - Ok(v) => v.into(), - Err(err) => proc_macro::TokenStream::from(err.to_compile_error()), - } -} - -#[proc_macro] -pub fn fields(_: proc_macro::TokenStream) -> proc_macro::TokenStream { - let isa = match load_isa() { - Ok(v) => v, - Err(err) => return err, - }; - match isa.gen_field_enum() { - Ok(v) => v.into(), - Err(err) => proc_macro::TokenStream::from(err.to_compile_error()), - } -} - -#[proc_macro] -pub fn ins_impl(_: proc_macro::TokenStream) -> proc_macro::TokenStream { - let isa = match load_isa() { - Ok(v) => v, - Err(err) => return err, - }; - match isa.gen_ins_impl() { - Ok(v) => v.into(), - Err(err) => proc_macro::TokenStream::from(err.to_compile_error()), - } -} - -fn load_isa() -> Result { - _load_isa().map_err(|err| { - proc_macro::TokenStream::from( - syn::Error::new(Span::def_site().into(), err).to_compile_error(), - ) - }) -} - -fn _load_isa() -> Result> { - // Figure out YAML path. - let def_site = Span::def_site(); - let rust_path = def_site.source_file().path(); - let yaml_path = rust_path.parent().unwrap().join("isa.yaml"); - // Open and deserialize YAML file. - let yaml_file = File::open(yaml_path).map_err(|e| syn::Error::new(def_site.into(), e))?; - let isa: Isa = - serde_yaml::from_reader(yaml_file).map_err(|e| syn::Error::new(def_site.into(), e))?; - Ok(isa) -} diff --git a/macros/src/writer.rs b/macros/src/writer.rs deleted file mode 100644 index dc96c3f..0000000 --- a/macros/src/writer.rs +++ /dev/null @@ -1,170 +0,0 @@ -use std::iter::FromIterator; -use std::string::ToString; - -use proc_macro2::{Delimiter, Group, TokenStream}; -use quote::quote; -use quote::ToTokens; -use syn::parse::{Parse, ParseStream}; -use syn::punctuated::Punctuated; -use syn::spanned::Spanned; -use syn::{Expr, ExprLit, ExprPath, Ident}; - -struct Arguments { - formatter: Expr, - ins: Expr, - args: Punctuated, -} - -impl Parse for Arguments { - fn parse(input: ParseStream) -> syn::Result { - let formatter = input.parse()?; - input.parse::()?; - let ins = input.parse()?; - input.parse::()?; - let content; - syn::braced!(content in input); - let args = Punctuated::parse_terminated(&content)?; - Ok(Self { - formatter, - ins, - args, - }) - } -} - -/// A single part of an instruction. -/// -/// Examples: -/// ```ignore -/// (op.mnemonic, rc, oe) -> mnemonic; -/// d -> fpr; -/// ``` -struct Argument { - sources: Vec, - target: Ident, -} - -impl Parse for Argument { - fn parse(input: ParseStream) -> syn::Result { - // Parse source part. - let lookahead = input.lookahead1(); - let sources; - if lookahead.peek(syn::token::Paren) { - // Parse multiple if we found a parenthesis. - let content; - syn::parenthesized!(content in input); - sources = content - .parse_terminated::(Expr::parse)? - .into_iter() - .collect(); - } else if lookahead.peek(syn::LitStr) || lookahead.peek(syn::LitInt) { - let expr = input.parse::()?.into(); - sources = vec![expr]; - } else { - let expr = input.parse::()?.into(); - sources = vec![expr]; - } - input.parse::()?; - let target = input.parse()?; - Ok(Self { sources, target }) - } -} - -impl Arguments { - fn format_mnemonic(&self) -> Vec { - let arg = &self.args[0]; - assert!(!arg.sources.is_empty()); - // Print the mnemonic. - let mut calls = vec![self.format_call(&arg.target, self.ins_call(&arg.sources[0]))]; - // Print any mnemonic suffixes. - for src in arg.sources.iter().skip(1) { - calls.push(self.format_call( - &Ident::new(&src.into_token_stream().to_string(), src.span()), - self.ins_call(src), - )); - } - calls - } - - fn format_call(&self, method_arg: &Ident, args: TokenStream) -> TokenStream { - let arg_str = method_arg.to_string(); - let method_name = format!("write_{}", arg_str); - let method_name = Ident::new(&method_name, method_arg.span()); - let formatter = &self.formatter; - if arg_str == "branch_target" { - quote!(#formatter.write_branch_target(#args, self.addr)?) - } else { - quote!(#formatter.#method_name(#args)?) - } - } - - fn ins_call(&self, call: &Expr) -> TokenStream { - match call { - Expr::Lit(_) => call.to_token_stream(), - _ => { - let ins = &self.ins; - quote!(#ins.#call()) - } - } - } -} - -pub(crate) fn write_asm(input: TokenStream) -> syn::Result { - let arguments: Arguments = syn::parse2(input)?; - assert!(!arguments.args.is_empty()); - - // Create a list of calls to execute. - let mut calls = Vec::::new(); - calls.extend(arguments.format_mnemonic()); - let mut offset_open = false; - for (i, arg) in arguments.args.iter().enumerate().skip(1) { - // Separate operands from one another unless the last one was an offset. - if !offset_open { - if i == 1 { - calls.push( - arguments - .format_call(&Ident::new("opcode_separator", arg.target.span()), quote!()), - ); - } else { - calls.push(arguments.format_call( - &Ident::new("operand_separator", arg.target.span()), - quote!(), - )); - } - } - // Arguments to out.write_x(...); - let format_args = arg.sources.iter().map(|src| arguments.ins_call(src)); - let format_args_punct: Punctuated = - Punctuated::from_iter(format_args); - // Create call. - if arg.target.to_string().starts_with("offset") { - // Offsets are a special case since we need to call close afterwards. - if offset_open { - return Err(syn::Error::new( - arg.target.span(), - "two consecutive offset arguments", - )); - } - calls.push(arguments.format_call( - &Ident::new(&(arg.target.to_string() + "_open"), arg.target.span()), - format_args_punct.to_token_stream(), - )); - offset_open = true; - } else { - calls.push(arguments.format_call(&arg.target, format_args_punct.to_token_stream())); - if offset_open { - calls.push( - arguments.format_call(&Ident::new("offset_close", arg.target.span()), quote!()), - ); - offset_open = false; - } - } - } - - // Wrap calls in a block returning Ok(()). - calls.push(quote!(std::io::Result::Ok(()))); - let statements = Punctuated::::from_iter(calls); - let tokens = Group::new(Delimiter::Brace, statements.to_token_stream()); - - Ok(tokens.to_token_stream()) -} diff --git a/rand/Cargo.toml b/rand/Cargo.toml index 1002969..7cb8f52 100644 --- a/rand/Cargo.toml +++ b/rand/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "ppc750cl-rand" version = "0.1.1" -edition = "2018" +edition = "2021" authors = ["Richard Patel "] license = "GPL-3.0-or-later" description = "Generate random PowerPC 750CL instructions"