From b4492dbfd52a30d76e77ec69e7cf43eb4bb47c8e Mon Sep 17 00:00:00 2001 From: Janis Date: Sat, 24 Jun 2023 01:02:42 +0200 Subject: [PATCH] initial commit (already containing way too much) of sdk builder sdk-builder builds the sdk from the serialized RON file initially only rust will be supported, but C++ and even C, Kotlin or Zig will be easy to add. --- Cargo.toml | 2 +- sdk-builder/Cargo.toml | 17 + sdk-builder/src/main.rs | 712 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 730 insertions(+), 1 deletion(-) create mode 100644 sdk-builder/Cargo.toml create mode 100644 sdk-builder/src/main.rs diff --git a/Cargo.toml b/Cargo.toml index 4de21cd..e0a1504 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,3 +1,3 @@ [workspace] -members = ["sdk-serializer", "unreal-sdk", "sdk-generator", "pdb-helper"] +members = ["sdk-serializer", "unreal-sdk", "sdk-generator", "pdb-helper", "sdk-builder"] resolver = "2" \ No newline at end of file diff --git a/sdk-builder/Cargo.toml b/sdk-builder/Cargo.toml new file mode 100644 index 0000000..e3f387f --- /dev/null +++ b/sdk-builder/Cargo.toml @@ -0,0 +1,17 @@ +[package] +name = "sdk-builder" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +log = "0.4.0" +anyhow = "1.0" +env_logger = "0.10.0" +itertools = "0.11.0" + +unreal-sdk = {path = "../unreal-sdk"} + +quote = "1.0.28" +proc-macro2 = "1.0.60" \ No newline at end of file diff --git a/sdk-builder/src/main.rs b/sdk-builder/src/main.rs new file mode 100644 index 0000000..0ea00ab --- /dev/null +++ b/sdk-builder/src/main.rs @@ -0,0 +1,712 @@ +use std::{borrow::Cow, collections::BTreeMap}; + +use unreal_sdk::sdk::repr::ObjectRef; + +fn main() { + println!("Hello, world!"); +} + +struct SplitResult<'a> { + start: Option<&'a str>, + middle: usize, + end: Option<&'a str>, +} + +impl<'a> SplitResult<'a> { + pub fn is_valid(&self) -> bool { + self.start.is_some() && self.middle == 0 && self.end.is_none() + } + + pub fn into_valid(self, disallowed_tokens: &[char]) -> Cow<'a, str> { + if self.is_valid() { + Cow::Borrowed(self.start.unwrap()) + } else { + let mut valid = self.start.map(|s| s.to_string()).unwrap_or_default(); + valid.extend(core::iter::repeat('_').take(self.middle)); + + match self.end { + Some(end) => { + valid.push_str( + &split_at_illegal_char(end, disallowed_tokens) + .into_valid(disallowed_tokens), + ); + } + None => {} + } + + Cow::Owned(valid) + } + } +} + +fn split_at_illegal_char<'a>(input: &'a str, disallowed_tokens: &[char]) -> SplitResult<'a> { + let illegal_chars = disallowed_tokens; + + if let Some(pos) = input.find(|c| illegal_chars.contains(&c)) { + let start = empty_or_some(&input[..pos]); + // skip the illegal char + let rest = &input[pos + 1..]; + + if let Some(pos2) = rest.find(|c| !illegal_chars.contains(&c)) { + SplitResult { + start, + middle: pos2 + 1, + end: empty_or_some(&rest[pos2..]), + } + } else { + SplitResult { + start, + middle: 1, + end: empty_or_some(rest), + } + } + } else { + SplitResult { + start: empty_or_some(input), + middle: 0, + end: None, + } + } +} + +fn canonicalize_name<'a>( + name: &'a str, + disallowed_tokens: &[char], + disallowed_strs: &[&str], +) -> Cow<'a, str> { + let valid = split_at_illegal_char(name, disallowed_tokens).into_valid(disallowed_tokens); + if disallowed_strs.contains(&valid.as_ref()) || valid.starts_with(|c: char| !c.is_alphabetic()) + { + Cow::Owned(format!("_{}", &valid)) + } else { + valid + } +} + +fn empty_or_some(s: &str) -> Option<&str> { + if s.is_empty() { + None + } else { + Some(s) + } +} + +pub struct CanonicalNames { + /// canonicalized type names for lookup when handling return types and parameters. + types: BTreeMap, +} + +pub mod rust { + use std::{borrow::Cow, collections::BTreeMap}; + + use anyhow::Context; + use itertools::Itertools; + use proc_macro2::TokenStream; + use quote::{format_ident, quote, TokenStreamExt}; + use unreal_sdk::sdk::repr::{ + Class, ClassField, ClassMethod, Enum, ObjectRef, PrimitiveType, ProcessedPackage, Sdk, + StructKind, Type, UnrealType, + }; + + use crate::split_at_illegal_char; + + const KEYWORDS: [&'static str; 51] = [ + "as", "break", "const", "continue", "crate", "else", "enum", "extern", "false", "fn", + "for", "if", "impl", "in", "let", "loop", "match", "mod", "move", "mut", "pub", "ref", + "return", "self", "Self", "static", "struct", "super", "trait", "true", "type", "unsafe", + "use", "where", "while", "async", "await", "dyn", "abstract", "become", "box", "do", + "final", "macro", "override", "priv", "typeof", "unsized", "virtual", "yield", "try", + ]; + + const TYPES: [&'static str; 17] = [ + "bool", "f64", "f32", "str", "char", "u8", "u16", "u32", "u64", "u128", "i8", "i16", "i32", + "i64", "i128", "usize", "isize", + ]; + + const WORDS: [&'static str; 68] = [ + "as", "break", "const", "continue", "crate", "else", "enum", "extern", "false", "fn", + "for", "if", "impl", "in", "let", "loop", "match", "mod", "move", "mut", "pub", "ref", + "return", "self", "Self", "static", "struct", "super", "trait", "true", "type", "unsafe", + "use", "where", "while", "async", "await", "dyn", "abstract", "become", "box", "do", + "final", "macro", "override", "priv", "typeof", "unsized", "virtual", "yield", "try", + "bool", "f64", "f32", "str", "char", "u8", "u16", "u32", "u64", "u128", "i8", "i16", "i32", + "i64", "i128", "usize", "isize", + ]; + + const CHARS: [char; 19] = [ + ' ', '?', '+', '-', ':', '/', '^', '(', ')', '[', ']', '<', '>', '&', '.', '#', '\'', '"', + '%', + ]; + + pub struct Builder { + type_name_cache: BTreeMap, + sdk: Sdk, + } + + fn canonicalize_name<'a>(name: &'a str) -> Cow<'a, str> { + let valid = split_at_illegal_char(name, &CHARS).into_valid(&CHARS); + if WORDS.contains(&valid.as_ref()) || valid.starts_with(|c: char| !c.is_alphabetic()) { + Cow::Owned(format!("_{}", &valid)) + } else { + valid + } + } + + impl Builder { + pub fn new(sdk: Sdk) -> Self { + let type_name_cache = sdk + .packages + .iter() + .flat_map(|(_, pkg)| { + pkg.types.values().map(|ty| { + let name = match ty { + UnrealType::Class(class) => { + format!("U{}", canonicalize_name(&class.name)) + } + UnrealType::Struct(class) => { + format!("F{}", canonicalize_name(&class.name)) + } + UnrealType::Actor(class) => { + format!("A{}", canonicalize_name(&class.name)) + } + UnrealType::Enum(class) => { + format!("E{}", canonicalize_name(&class.name)) + } + }; + + (ty.obj_ref(), name) + }) + }) + .collect::>(); + + Self { + type_name_cache, + sdk, + } + } + + fn type_name(&self, ty: &Type) -> anyhow::Result { + let type_name = match ty { + Type::Ptr(inner) | Type::Ref(inner) => { + format!("Option>", self.type_name(&inner)?) + } + Type::WeakPtr(inner) => { + format!( + "TWeakObjectPtr<{}>", + self.type_name_cache + .get(inner) + .context("type name was not cached.")? + ) + } + Type::SoftPtr(inner) => { + format!( + "TSoftObjectPtr<{}>", + self.type_name_cache + .get(inner) + .context("type name was not cached.")? + ) + } + Type::LazyPtr(inner) => { + format!( + "TLazyObjectPtr<{}>", + self.type_name_cache + .get(inner) + .context("type name was not cached.")? + ) + } + Type::AssetPtr(inner) => format!( + "TAssetPtr<{}>", + self.type_name_cache + .get(inner) + .context("type name was not cached.")? + ), + Type::Array(inner) => format!("TArray<{}>", self.type_name(&inner)?), + Type::Primitive(prim) => { + format!("{prim}") + } + Type::RawArray { ty, len } => { + format!("[{}; {}]", self.type_name(&ty)?, len) + } + Type::Name => "FName".to_string(), + Type::String => "FString".to_string(), + Type::Text => "FText".to_string(), + Type::Enum { + underlying, + enum_type, + } => self + .type_name_cache + .get(enum_type) + .context("type name was not cached.")? + .clone(), + Type::Class(class) => { + format!( + "::core::option::Option<{}>", + self.type_name_cache + .get(class) + .context("type name was not cached.")? + ) + } + Type::Struct(class) => self + .type_name_cache + .get(class) + .context("type name was not cached.")? + .clone(), + }; + + Ok(type_name) + } + + fn generate_enum(&self, enum0: &Enum) -> anyhow::Result { + let name = self + .type_name_cache + .get(&enum0.obj_ref) + .context("enum name was not previously canonicalized and cached.")?; + + let variants = enum0.values.iter().map(|(&value, name)| { + let name = canonicalize_name(&name); + quote! { + #name = #value, + } + }); + + let tokens = quote! { + #[repr(u8)] + #[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd)] + pub enum #name { + #(#variants)* + } + }; + + Ok(tokens) + } + + /// returns a tuple of: + /// - the type definition of the type as a TokenStream + /// - the impls for that type, like Clone, AsUObject, AsPtr and StaticClass + fn generate_object( + &self, + class: &Class, + name: &str, + ) -> anyhow::Result<(TokenStream, TokenStream)> { + let typedef = quote! { + #[derive(Eq, PartialEq, Copy, Clone)] + pub struct #name(pub ::core::ptr::NonNull); + }; + + let static_class_impl: TokenStream = Self::generate_find_object(name); + + let impls = quote! { + impl AsUObject for #name { + fn as_uobject(&self) -> UObject { + UObject(self.0) + } + + fn from_uobject(obj: &UObject) -> Self { + Self(obj.0) + } + } + + impl AsPtr for #name { + fn as_ptr(&self) -> *const u8 { + unsafe { self.0.as_ref().get() as _ } + } + + fn as_mut_ptr(&self) -> *mut u8 { + unsafe { self.0.as_ref().get() as _ } + } + } + + impl StaticClass for #name { + fn get_static_class() -> Option { + let class: Option = #static_class_impl; + class + } + } + }; + + Ok((typedef, impls)) + } + + /// returns a tuple of: + /// - the type definition of the type as a TokenStream + /// - the impls for that type, like Clone, AsUObject, AsPtr and StaticClass + fn generate_struct( + &self, + class: &Class, + name: &str, + ctor: Option, + ) -> anyhow::Result<(TokenStream, TokenStream)> { + let size = class.size; + let typedef = quote! { + pub struct #name(pub ::core::cell::UnsafeCell); + }; + + let impls = quote! { + impl Eq for #name {} + impl PartialEq for #name { + fn eq(&self, other: &Self) -> bool { + unsafe {(&*self.0.get()).eq(&*other.0.get())} + } + } + + impl Clone for #name { + fn clone(&self) -> Self { + Self(::core::cell::UnsafeCell::new(unsafe {&*self.0.get()}.clone())) + } + } + + impl AsPtr for #name { + fn as_ptr(&self) -> *const u8 { + self.0.get().cast() + } + + fn as_mut_ptr(&self) -> *mut u8 { + self.0.get().cast() + } + } + + impl #name { + pub fn zeroed() -> Self { + unsafe { + ::core::mem::MaybeUninit::::zeroed().assume_init() + } + } + + ctor + } + + }; + + Ok((typedef, impls)) + } + + /// returns a tuple of: + /// - all of the params struct definitions + /// - all of the methods. + fn generate_struct_methods( + &self, + class: &Class, + name: &str, + ) -> anyhow::Result<(Vec, Vec)> { + let methods = class + .methods + .iter() + .map(|method| self.generate_method(name, method)) + .collect::, _>>()?; + + let (params, methods) = methods.into_iter().unzip::<_, _, Vec<_>, Vec<_>>(); + + Ok((params, methods)) + } + + /// returns a tuple of: + /// - the definition of the params struct + /// - the method wrapper. + fn generate_method( + &self, + struct_name: &str, + method: &ClassMethod, + ) -> anyhow::Result<(TokenStream, TokenStream)> { + let method_name = canonicalize_name(&method.name); + + let parameters = method + .parameters + .iter() + .map(|parameter| { + let name = canonicalize_name(¶meter.name); + let type_name = self.type_name(¶meter.ty)?; + + anyhow::Ok((parameter, name, type_name)) + }) + .collect::, _>>()?; + + let all_params = parameters + .iter() + .map(|(param, name, ty)| (param, quote! {#name: #ty})) + .collect::>(); + + let params = all_params + .iter() + .filter(|(param, _)| { + param.is_param() || (!param.is_return_param() && param.is_const_param()) + }) + .map(|(_, tokens)| tokens.clone()); + + let all_params = all_params.iter().map(|(_, tokens)| tokens.clone()); + + let init_params = parameters.iter().map(|(_, name, _)| { + quote! {params.#name = #name;} + }); + + let (return_type, handle_return) = { + let (names, types) = parameters + .iter() + .filter(|(param, _, _)| { + param.is_return_param() || (param.is_out_param() && !param.is_const_param()) + }) + .map(|(_, name, ty)| { + ( + quote! { + #name + }, + quote! { + #ty + }, + ) + }) + .unzip::<_, _, Vec<_>, Vec<_>>(); + + ( + quote! { + (#(#types),*) + }, + quote! { + (#(params.#names),*) + }, + ) + }; + + let find_function = Self::generate_find_object(&method.full_name); + let params_type = format_ident!("{struct_name}{method_name}Params"); + + let params_def = quote! { + #[repr(C)] + #[derive(Debug)] + pub struct #params_type { + #(pub #all_params),* + } + }; + + let method_def = quote! { + fn #method_name(&self, #(#params),*) -> #return_type { + let mut func: UFunction = {#find_function}.expect("function '#full_name' not found."); + let mut params = #params_type::zeroed(); + + #(#init_params + )* + + let flags = *func.function_flags(); + process_event(self.as_uobject(), func, &mut params); + func.set_function_flags(flags); + + #handle_return + } + }; + + Ok((params_def, method_def)) + } + + /// generates getter, setter and optionally mut_getter for the field, handles bitset booleans. + fn generate_field_accessors( + &self, + field: &ClassField, + field_name: &Cow, + type_name: &String, + ) -> TokenStream { + let setter = format_ident!("set_{}", field_name); + let getter = format_ident!("get_{}", field_name); + let mut_getter = format_ident!("mut_{}", field_name); + + let offset = field.offset; + + let (getter, setter, mut_getter) = match field.ty { + Type::Primitive(PrimitiveType::Bool { + byte_mask, + field_mask, + .. + }) => { + let shift = field_mask.trailing_zeros(); + + let getter = quote! { + fn #getter(&self) -> bool { + unsafe { + *self.as_ptr().offset(#offset) & (1u8 << #shift) != 0 + } + } + }; + + let setter = quote! { + fn #setter(&mut self, #field_name: bool) -> () { + unsafe { + if #field_name { + *self.as_mut_ptr().offset(#offset) |= (#field_name as u8) << shift; + } else { + *self.as_mut_ptr().offset(#offset) &= !((#field_name as u8) << shift); + } + } + } + }; + + (getter, setter, None) + } + _ => { + let getter = quote! { + fn #getter(&self) -> &#type_name { + unsafe {&*self.as_ptr().offset(#offset).cast()} + } + }; + let setter = quote! { + fn #setter(&mut self, #field_name: #type_name) { + *unsafe {&*self.as_ptr().offset(#offset).cast()} = #field_name; + } + }; + let mut_getter = quote! { + fn #mut_getter(&mut self) -> &mut #type_name { + unsafe {&mut *self.as_mut_ptr().offset(#offset).cast()} + } + + }; + + (getter, setter, Some(mut_getter)) + } + }; + + quote! { + #getter + + #setter + + #mut_getter + } + } + + fn generate_struct_ctor( + &self, + class: &Class, + name: &str, + fields: &Vec<(&ClassField, Cow, String)>, + ) -> TokenStream { + let fields_defs = fields.iter().map(|(_, name, ty)| quote! {#name: #ty}); + + let this_field_asignments = fields.iter().map(|(_, name, ty)| { + let setter = format_ident!("set_{}", name); + quote! {this.setter(#name);} + }); + + // FIXME: handle super struct fields aswell, ARK doesnt seem to have those anyways. + + quote! { + pub fn new(#(#fields_defs),*) -> Self { + let mut this = Self::zeroed(); + + #(#this_field_asignments)* + + this + } + } + } + + /// returns a tokenstream with the accessor trait definition and implementation, + /// as well as optionally a constructor for UScriptStructs + fn generate_struct_fields( + &self, + class: &Class, + name: &str, + ) -> anyhow::Result<(TokenStream, Option)> { + let fields = class + .fields + .iter() + .map(|field| { + let name = canonicalize_name(&field.name); + let ty = self.type_name(&field.ty)?; + + anyhow::Ok((field, name, ty)) + }) + .collect::, _>>()?; + + let ctor = if class.kind == StructKind::Struct { + Some(self.generate_struct_ctor(class, name, &fields)) + } else { + None + }; + + let field_accessors = fields + .iter() + .map(|(field, name, ty)| self.generate_field_accessors(field, name, ty)); + + let fields_trait = format_ident!("{name}Fields"); + + let fields_trait = quote! { + pub trait #fields_trait: AsPtr { + #(#field_accessors)* + } + + impl #fields_trait for #name {} + }; + + Ok((fields_trait, ctor)) + } + + fn generate_find_object(name: &str) -> TokenStream { + quote! { + static OBJECT: ::once_cell::sync::OnceCell<::core::option::Option> = ::once_cell::sync::OnceCell::new(); + OBJECT.get_or_init(|| { + match find_object(::obfstr::obfstr!("#name")) { + object @ Some(_) => {object}, + None => { + ::log::error!("{}", obfstr::obfstr!("static object {#name} not found!")); + } + } + }) + .map(|object| unsafe {object.cast()}) + } + } + + fn generate_class(&self, class: &Class) -> anyhow::Result { + let name = self + .type_name_cache + .get(&class.obj_ref) + .context("enum name was not previously canonicalized and cached.")?; + + let (field_trait, ctor) = self.generate_struct_fields(class, name)?; + + let (typedef, impls) = match class.kind { + StructKind::Object | StructKind::Actor => self.generate_object(class, name)?, + StructKind::Struct => self.generate_struct(class, name, ctor)?, + }; + + quote! { + #[repr(transparent)] + #[derive(Debug)] + #typedef + + unsafe impl Send for #name {} + unsafe impl Sync for #name {} + + #impls + + #field_trait + }; + todo!() + } + + fn generate_package(&self, pkg: &ProcessedPackage) -> anyhow::Result<()> { + // TODO: canonicalize_name(&pkg.name); + let pkg_name = "PACKAGE_NAME_PLACEHOLDER".to_string(); + + for (id, ty) in &pkg.types { + let name = self + .type_name_cache + .get(id) + .expect("type name was not cached."); + + let tokens = match ty { + UnrealType::Class(class) + | UnrealType::Actor(class) + | UnrealType::Struct(class) => self.generate_class(class)?, + UnrealType::Enum(enum0) => self.generate_enum(enum0)?, + }; + } + + quote! { + #[cfg(feature = "#pkg_name")] + pub mod #pkg_name { + #![allow(dead_code, unused_imports, non_snake_case, non_camel_case_types)] + + } + }; + + todo!() + } + } +}