initial commit (already containing way too much) of sdk builder

sdk-builder builds the sdk from the serialized RON file
initially only rust will be supported, but C++ and even C, Kotlin or Zig will be easy to add.
This commit is contained in:
Janis 2023-06-24 01:02:42 +02:00
parent 2ccddaf20d
commit b4492dbfd5
3 changed files with 730 additions and 1 deletions

View file

@ -1,3 +1,3 @@
[workspace]
members = ["sdk-serializer", "unreal-sdk", "sdk-generator", "pdb-helper"]
members = ["sdk-serializer", "unreal-sdk", "sdk-generator", "pdb-helper", "sdk-builder"]
resolver = "2"

17
sdk-builder/Cargo.toml Normal file
View file

@ -0,0 +1,17 @@
[package]
name = "sdk-builder"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
log = "0.4.0"
anyhow = "1.0"
env_logger = "0.10.0"
itertools = "0.11.0"
unreal-sdk = {path = "../unreal-sdk"}
quote = "1.0.28"
proc-macro2 = "1.0.60"

712
sdk-builder/src/main.rs Normal file
View file

@ -0,0 +1,712 @@
use std::{borrow::Cow, collections::BTreeMap};
use unreal_sdk::sdk::repr::ObjectRef;
fn main() {
println!("Hello, world!");
}
struct SplitResult<'a> {
start: Option<&'a str>,
middle: usize,
end: Option<&'a str>,
}
impl<'a> SplitResult<'a> {
pub fn is_valid(&self) -> bool {
self.start.is_some() && self.middle == 0 && self.end.is_none()
}
pub fn into_valid(self, disallowed_tokens: &[char]) -> Cow<'a, str> {
if self.is_valid() {
Cow::Borrowed(self.start.unwrap())
} else {
let mut valid = self.start.map(|s| s.to_string()).unwrap_or_default();
valid.extend(core::iter::repeat('_').take(self.middle));
match self.end {
Some(end) => {
valid.push_str(
&split_at_illegal_char(end, disallowed_tokens)
.into_valid(disallowed_tokens),
);
}
None => {}
}
Cow::Owned(valid)
}
}
}
fn split_at_illegal_char<'a>(input: &'a str, disallowed_tokens: &[char]) -> SplitResult<'a> {
let illegal_chars = disallowed_tokens;
if let Some(pos) = input.find(|c| illegal_chars.contains(&c)) {
let start = empty_or_some(&input[..pos]);
// skip the illegal char
let rest = &input[pos + 1..];
if let Some(pos2) = rest.find(|c| !illegal_chars.contains(&c)) {
SplitResult {
start,
middle: pos2 + 1,
end: empty_or_some(&rest[pos2..]),
}
} else {
SplitResult {
start,
middle: 1,
end: empty_or_some(rest),
}
}
} else {
SplitResult {
start: empty_or_some(input),
middle: 0,
end: None,
}
}
}
fn canonicalize_name<'a>(
name: &'a str,
disallowed_tokens: &[char],
disallowed_strs: &[&str],
) -> Cow<'a, str> {
let valid = split_at_illegal_char(name, disallowed_tokens).into_valid(disallowed_tokens);
if disallowed_strs.contains(&valid.as_ref()) || valid.starts_with(|c: char| !c.is_alphabetic())
{
Cow::Owned(format!("_{}", &valid))
} else {
valid
}
}
fn empty_or_some(s: &str) -> Option<&str> {
if s.is_empty() {
None
} else {
Some(s)
}
}
pub struct CanonicalNames {
/// canonicalized type names for lookup when handling return types and parameters.
types: BTreeMap<ObjectRef, String>,
}
pub mod rust {
use std::{borrow::Cow, collections::BTreeMap};
use anyhow::Context;
use itertools::Itertools;
use proc_macro2::TokenStream;
use quote::{format_ident, quote, TokenStreamExt};
use unreal_sdk::sdk::repr::{
Class, ClassField, ClassMethod, Enum, ObjectRef, PrimitiveType, ProcessedPackage, Sdk,
StructKind, Type, UnrealType,
};
use crate::split_at_illegal_char;
const KEYWORDS: [&'static str; 51] = [
"as", "break", "const", "continue", "crate", "else", "enum", "extern", "false", "fn",
"for", "if", "impl", "in", "let", "loop", "match", "mod", "move", "mut", "pub", "ref",
"return", "self", "Self", "static", "struct", "super", "trait", "true", "type", "unsafe",
"use", "where", "while", "async", "await", "dyn", "abstract", "become", "box", "do",
"final", "macro", "override", "priv", "typeof", "unsized", "virtual", "yield", "try",
];
const TYPES: [&'static str; 17] = [
"bool", "f64", "f32", "str", "char", "u8", "u16", "u32", "u64", "u128", "i8", "i16", "i32",
"i64", "i128", "usize", "isize",
];
const WORDS: [&'static str; 68] = [
"as", "break", "const", "continue", "crate", "else", "enum", "extern", "false", "fn",
"for", "if", "impl", "in", "let", "loop", "match", "mod", "move", "mut", "pub", "ref",
"return", "self", "Self", "static", "struct", "super", "trait", "true", "type", "unsafe",
"use", "where", "while", "async", "await", "dyn", "abstract", "become", "box", "do",
"final", "macro", "override", "priv", "typeof", "unsized", "virtual", "yield", "try",
"bool", "f64", "f32", "str", "char", "u8", "u16", "u32", "u64", "u128", "i8", "i16", "i32",
"i64", "i128", "usize", "isize",
];
const CHARS: [char; 19] = [
' ', '?', '+', '-', ':', '/', '^', '(', ')', '[', ']', '<', '>', '&', '.', '#', '\'', '"',
'%',
];
pub struct Builder {
type_name_cache: BTreeMap<ObjectRef, String>,
sdk: Sdk,
}
fn canonicalize_name<'a>(name: &'a str) -> Cow<'a, str> {
let valid = split_at_illegal_char(name, &CHARS).into_valid(&CHARS);
if WORDS.contains(&valid.as_ref()) || valid.starts_with(|c: char| !c.is_alphabetic()) {
Cow::Owned(format!("_{}", &valid))
} else {
valid
}
}
impl Builder {
pub fn new(sdk: Sdk) -> Self {
let type_name_cache = sdk
.packages
.iter()
.flat_map(|(_, pkg)| {
pkg.types.values().map(|ty| {
let name = match ty {
UnrealType::Class(class) => {
format!("U{}", canonicalize_name(&class.name))
}
UnrealType::Struct(class) => {
format!("F{}", canonicalize_name(&class.name))
}
UnrealType::Actor(class) => {
format!("A{}", canonicalize_name(&class.name))
}
UnrealType::Enum(class) => {
format!("E{}", canonicalize_name(&class.name))
}
};
(ty.obj_ref(), name)
})
})
.collect::<BTreeMap<_, _>>();
Self {
type_name_cache,
sdk,
}
}
fn type_name(&self, ty: &Type) -> anyhow::Result<String> {
let type_name = match ty {
Type::Ptr(inner) | Type::Ref(inner) => {
format!("Option<NonNull<{}>>", self.type_name(&inner)?)
}
Type::WeakPtr(inner) => {
format!(
"TWeakObjectPtr<{}>",
self.type_name_cache
.get(inner)
.context("type name was not cached.")?
)
}
Type::SoftPtr(inner) => {
format!(
"TSoftObjectPtr<{}>",
self.type_name_cache
.get(inner)
.context("type name was not cached.")?
)
}
Type::LazyPtr(inner) => {
format!(
"TLazyObjectPtr<{}>",
self.type_name_cache
.get(inner)
.context("type name was not cached.")?
)
}
Type::AssetPtr(inner) => format!(
"TAssetPtr<{}>",
self.type_name_cache
.get(inner)
.context("type name was not cached.")?
),
Type::Array(inner) => format!("TArray<{}>", self.type_name(&inner)?),
Type::Primitive(prim) => {
format!("{prim}")
}
Type::RawArray { ty, len } => {
format!("[{}; {}]", self.type_name(&ty)?, len)
}
Type::Name => "FName".to_string(),
Type::String => "FString".to_string(),
Type::Text => "FText".to_string(),
Type::Enum {
underlying,
enum_type,
} => self
.type_name_cache
.get(enum_type)
.context("type name was not cached.")?
.clone(),
Type::Class(class) => {
format!(
"::core::option::Option<{}>",
self.type_name_cache
.get(class)
.context("type name was not cached.")?
)
}
Type::Struct(class) => self
.type_name_cache
.get(class)
.context("type name was not cached.")?
.clone(),
};
Ok(type_name)
}
fn generate_enum(&self, enum0: &Enum) -> anyhow::Result<TokenStream> {
let name = self
.type_name_cache
.get(&enum0.obj_ref)
.context("enum name was not previously canonicalized and cached.")?;
let variants = enum0.values.iter().map(|(&value, name)| {
let name = canonicalize_name(&name);
quote! {
#name = #value,
}
});
let tokens = quote! {
#[repr(u8)]
#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd)]
pub enum #name {
#(#variants)*
}
};
Ok(tokens)
}
/// returns a tuple of:
/// - the type definition of the type as a TokenStream
/// - the impls for that type, like Clone, AsUObject, AsPtr and StaticClass
fn generate_object(
&self,
class: &Class,
name: &str,
) -> anyhow::Result<(TokenStream, TokenStream)> {
let typedef = quote! {
#[derive(Eq, PartialEq, Copy, Clone)]
pub struct #name(pub ::core::ptr::NonNull<u8>);
};
let static_class_impl: TokenStream = Self::generate_find_object(name);
let impls = quote! {
impl AsUObject for #name {
fn as_uobject(&self) -> UObject {
UObject(self.0)
}
fn from_uobject(obj: &UObject) -> Self {
Self(obj.0)
}
}
impl AsPtr for #name {
fn as_ptr(&self) -> *const u8 {
unsafe { self.0.as_ref().get() as _ }
}
fn as_mut_ptr(&self) -> *mut u8 {
unsafe { self.0.as_ref().get() as _ }
}
}
impl StaticClass for #name {
fn get_static_class() -> Option<UClass> {
let class: Option<UClass> = #static_class_impl;
class
}
}
};
Ok((typedef, impls))
}
/// returns a tuple of:
/// - the type definition of the type as a TokenStream
/// - the impls for that type, like Clone, AsUObject, AsPtr and StaticClass
fn generate_struct(
&self,
class: &Class,
name: &str,
ctor: Option<TokenStream>,
) -> anyhow::Result<(TokenStream, TokenStream)> {
let size = class.size;
let typedef = quote! {
pub struct #name(pub ::core::cell::UnsafeCell<u8; #size>);
};
let impls = quote! {
impl Eq for #name {}
impl PartialEq for #name {
fn eq(&self, other: &Self) -> bool {
unsafe {(&*self.0.get()).eq(&*other.0.get())}
}
}
impl Clone for #name {
fn clone(&self) -> Self {
Self(::core::cell::UnsafeCell::new(unsafe {&*self.0.get()}.clone()))
}
}
impl AsPtr for #name {
fn as_ptr(&self) -> *const u8 {
self.0.get().cast()
}
fn as_mut_ptr(&self) -> *mut u8 {
self.0.get().cast()
}
}
impl #name {
pub fn zeroed() -> Self {
unsafe {
::core::mem::MaybeUninit::<Self>::zeroed().assume_init()
}
}
ctor
}
};
Ok((typedef, impls))
}
/// returns a tuple of:
/// - all of the params struct definitions
/// - all of the methods.
fn generate_struct_methods(
&self,
class: &Class,
name: &str,
) -> anyhow::Result<(Vec<TokenStream>, Vec<TokenStream>)> {
let methods = class
.methods
.iter()
.map(|method| self.generate_method(name, method))
.collect::<Result<Vec<_>, _>>()?;
let (params, methods) = methods.into_iter().unzip::<_, _, Vec<_>, Vec<_>>();
Ok((params, methods))
}
/// returns a tuple of:
/// - the definition of the params struct
/// - the method wrapper.
fn generate_method(
&self,
struct_name: &str,
method: &ClassMethod,
) -> anyhow::Result<(TokenStream, TokenStream)> {
let method_name = canonicalize_name(&method.name);
let parameters = method
.parameters
.iter()
.map(|parameter| {
let name = canonicalize_name(&parameter.name);
let type_name = self.type_name(&parameter.ty)?;
anyhow::Ok((parameter, name, type_name))
})
.collect::<Result<Vec<_>, _>>()?;
let all_params = parameters
.iter()
.map(|(param, name, ty)| (param, quote! {#name: #ty}))
.collect::<Vec<_>>();
let params = all_params
.iter()
.filter(|(param, _)| {
param.is_param() || (!param.is_return_param() && param.is_const_param())
})
.map(|(_, tokens)| tokens.clone());
let all_params = all_params.iter().map(|(_, tokens)| tokens.clone());
let init_params = parameters.iter().map(|(_, name, _)| {
quote! {params.#name = #name;}
});
let (return_type, handle_return) = {
let (names, types) = parameters
.iter()
.filter(|(param, _, _)| {
param.is_return_param() || (param.is_out_param() && !param.is_const_param())
})
.map(|(_, name, ty)| {
(
quote! {
#name
},
quote! {
#ty
},
)
})
.unzip::<_, _, Vec<_>, Vec<_>>();
(
quote! {
(#(#types),*)
},
quote! {
(#(params.#names),*)
},
)
};
let find_function = Self::generate_find_object(&method.full_name);
let params_type = format_ident!("{struct_name}{method_name}Params");
let params_def = quote! {
#[repr(C)]
#[derive(Debug)]
pub struct #params_type {
#(pub #all_params),*
}
};
let method_def = quote! {
fn #method_name(&self, #(#params),*) -> #return_type {
let mut func: UFunction = {#find_function}.expect("function '#full_name' not found.");
let mut params = #params_type::zeroed();
#(#init_params
)*
let flags = *func.function_flags();
process_event(self.as_uobject(), func, &mut params);
func.set_function_flags(flags);
#handle_return
}
};
Ok((params_def, method_def))
}
/// generates getter, setter and optionally mut_getter for the field, handles bitset booleans.
fn generate_field_accessors(
&self,
field: &ClassField,
field_name: &Cow<str>,
type_name: &String,
) -> TokenStream {
let setter = format_ident!("set_{}", field_name);
let getter = format_ident!("get_{}", field_name);
let mut_getter = format_ident!("mut_{}", field_name);
let offset = field.offset;
let (getter, setter, mut_getter) = match field.ty {
Type::Primitive(PrimitiveType::Bool {
byte_mask,
field_mask,
..
}) => {
let shift = field_mask.trailing_zeros();
let getter = quote! {
fn #getter(&self) -> bool {
unsafe {
*self.as_ptr().offset(#offset) & (1u8 << #shift) != 0
}
}
};
let setter = quote! {
fn #setter(&mut self, #field_name: bool) -> () {
unsafe {
if #field_name {
*self.as_mut_ptr().offset(#offset) |= (#field_name as u8) << shift;
} else {
*self.as_mut_ptr().offset(#offset) &= !((#field_name as u8) << shift);
}
}
}
};
(getter, setter, None)
}
_ => {
let getter = quote! {
fn #getter(&self) -> &#type_name {
unsafe {&*self.as_ptr().offset(#offset).cast()}
}
};
let setter = quote! {
fn #setter(&mut self, #field_name: #type_name) {
*unsafe {&*self.as_ptr().offset(#offset).cast()} = #field_name;
}
};
let mut_getter = quote! {
fn #mut_getter(&mut self) -> &mut #type_name {
unsafe {&mut *self.as_mut_ptr().offset(#offset).cast()}
}
};
(getter, setter, Some(mut_getter))
}
};
quote! {
#getter
#setter
#mut_getter
}
}
fn generate_struct_ctor(
&self,
class: &Class,
name: &str,
fields: &Vec<(&ClassField, Cow<str>, String)>,
) -> TokenStream {
let fields_defs = fields.iter().map(|(_, name, ty)| quote! {#name: #ty});
let this_field_asignments = fields.iter().map(|(_, name, ty)| {
let setter = format_ident!("set_{}", name);
quote! {this.setter(#name);}
});
// FIXME: handle super struct fields aswell, ARK doesnt seem to have those anyways.
quote! {
pub fn new(#(#fields_defs),*) -> Self {
let mut this = Self::zeroed();
#(#this_field_asignments)*
this
}
}
}
/// returns a tokenstream with the accessor trait definition and implementation,
/// as well as optionally a constructor for UScriptStructs
fn generate_struct_fields(
&self,
class: &Class,
name: &str,
) -> anyhow::Result<(TokenStream, Option<TokenStream>)> {
let fields = class
.fields
.iter()
.map(|field| {
let name = canonicalize_name(&field.name);
let ty = self.type_name(&field.ty)?;
anyhow::Ok((field, name, ty))
})
.collect::<Result<Vec<_>, _>>()?;
let ctor = if class.kind == StructKind::Struct {
Some(self.generate_struct_ctor(class, name, &fields))
} else {
None
};
let field_accessors = fields
.iter()
.map(|(field, name, ty)| self.generate_field_accessors(field, name, ty));
let fields_trait = format_ident!("{name}Fields");
let fields_trait = quote! {
pub trait #fields_trait: AsPtr {
#(#field_accessors)*
}
impl #fields_trait for #name {}
};
Ok((fields_trait, ctor))
}
fn generate_find_object(name: &str) -> TokenStream {
quote! {
static OBJECT: ::once_cell::sync::OnceCell<::core::option::Option<UObject>> = ::once_cell::sync::OnceCell::new();
OBJECT.get_or_init(|| {
match find_object(::obfstr::obfstr!("#name")) {
object @ Some(_) => {object},
None => {
::log::error!("{}", obfstr::obfstr!("static object {#name} not found!"));
}
}
})
.map(|object| unsafe {object.cast()})
}
}
fn generate_class(&self, class: &Class) -> anyhow::Result<TokenStream> {
let name = self
.type_name_cache
.get(&class.obj_ref)
.context("enum name was not previously canonicalized and cached.")?;
let (field_trait, ctor) = self.generate_struct_fields(class, name)?;
let (typedef, impls) = match class.kind {
StructKind::Object | StructKind::Actor => self.generate_object(class, name)?,
StructKind::Struct => self.generate_struct(class, name, ctor)?,
};
quote! {
#[repr(transparent)]
#[derive(Debug)]
#typedef
unsafe impl Send for #name {}
unsafe impl Sync for #name {}
#impls
#field_trait
};
todo!()
}
fn generate_package(&self, pkg: &ProcessedPackage) -> anyhow::Result<()> {
// TODO: canonicalize_name(&pkg.name);
let pkg_name = "PACKAGE_NAME_PLACEHOLDER".to_string();
for (id, ty) in &pkg.types {
let name = self
.type_name_cache
.get(id)
.expect("type name was not cached.");
let tokens = match ty {
UnrealType::Class(class)
| UnrealType::Actor(class)
| UnrealType::Struct(class) => self.generate_class(class)?,
UnrealType::Enum(enum0) => self.generate_enum(enum0)?,
};
}
quote! {
#[cfg(feature = "#pkg_name")]
pub mod #pkg_name {
#![allow(dead_code, unused_imports, non_snake_case, non_camel_case_types)]
}
};
todo!()
}
}
}